From 8c70e61bbb0106d77cda23d2c53150cf6d71e73c Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Wed, 29 Apr 2020 09:58:38 +0200 Subject: [PATCH] cnn enabled --- src/main.py | 49 +++--- src/model/cnn.py | 48 ++++++ src/model/model.py | 330 ++++++++++++++++++++++++++++++++++++ src/model/transformation.py | 0 4 files changed, 405 insertions(+), 22 deletions(-) create mode 100644 src/model/cnn.py create mode 100644 src/model/model.py create mode 100644 src/model/transformation.py diff --git a/src/main.py b/src/main.py index 9082ffa..dd1f443 100644 --- a/src/main.py +++ b/src/main.py @@ -1,10 +1,12 @@ import numpy as np from index import Index -from model import RNNProjection, AuthorshipAttributionClassifier, Batch, SameAuthorClassifier, FullAuthorClassifier +from model.model import RNNProjection, AuthorshipAttributionClassifier, SameAuthorClassifier, FullAuthorClassifier from data.fetch_victorian import Victorian from evaluation import eval import torch +from model.cnn import CNNProjection + if torch.cuda.is_available(): device = torch.device('cuda') else: @@ -41,41 +43,44 @@ x1, y1 = Xte[shuffle1], yte[shuffle1] x2, y2 = Xte[shuffle2], yte[shuffle2] paired_y = y1==y2 -hidden_size=64 -output_size=128 +hidden_size=128 +channels_out=128 +output_size=1024 +kernel_sizes=[3,5,7,11,13] pad_length=1000 -batch_size=50 -n_epochs=10 - +batch_size=64 +n_epochs=256 +""" hidden_size=16 output_size=32 pad_length=100 batch_size=10 n_epochs=2 - +""" # attribution print('Attribution') -phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) +#phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) +phi = CNNProjection(vocabulary_size=index.vocabulary_size(), embedding_dim=hidden_size, out_size=output_size, channels_out=channels_out, kernel_sizes=kernel_sizes, dropout=0.5).to(device) cls = AuthorshipAttributionClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) yte_ = cls.predict(Xte) eval(yte, yte_) # verification -print('Verification') -phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) -cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) -cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) -paired_y_ = cls.predict(x1,x2) -eval(paired_y, paired_y_) +#print('Verification') +#phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) +#cls = SameAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) +#cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) +#paired_y_ = cls.predict(x1,x2) +#eval(paired_y, paired_y_) # attribution & verification -print('Attribution & Verification') -phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) -cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) -cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) -yte_ = cls.predict_labels(Xte) -eval(yte, yte_) -paired_y_ = cls.predict_sav(x1,x2) -eval(paired_y, paired_y_) +#print('Attribution & Verification') +#phi = RNNProjection(vocab_size=index.vocabulary_size(), hidden_size=hidden_size, output_size=output_size, device=device) +#cls = FullAuthorClassifier(phi, num_authors=A.size, pad_index=pad_index, pad_length=pad_length, device=device) +#cls.fit(Xtr, ytr, batch_size=batch_size, epochs=n_epochs) +#yte_ = cls.predict_labels(Xte) +#eval(yte, yte_) +#paired_y_ = cls.predict_sav(x1,x2) +#eval(paired_y, paired_y_) diff --git a/src/model/cnn.py b/src/model/cnn.py new file mode 100644 index 0000000..56cea80 --- /dev/null +++ b/src/model/cnn.py @@ -0,0 +1,48 @@ +# adapted from https://github.com/Shawn1993/cnn-text-classification-pytorch/blob/master/model.py +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class CNNProjection(nn.Module): + + def __init__(self, vocabulary_size, embedding_dim, out_size, channels_out, kernel_sizes, dropout=0.5): + super(CNNProjection, self).__init__() + channels_in = 1 + self.embed = nn.Embedding(vocabulary_size, embedding_dim) + self.convs1 = nn.ModuleList( + [nn.Conv2d(channels_in, channels_out, (K, embedding_dim)) for K in kernel_sizes] + ) + ''' + self.conv13 = nn.Conv2d(Ci, Co, (3, D)) + self.conv14 = nn.Conv2d(Ci, Co, (4, D)) + self.conv15 = nn.Conv2d(Ci, Co, (5, D)) + ''' + self.dropout = nn.Dropout(dropout) + self.fc1 = nn.Linear(len(kernel_sizes) * channels_out, out_size) + self.output_size = out_size + + def conv_and_pool(self, x, conv): + x = F.relu(conv(x)).squeeze(3) # (N, Co, W) + x = F.max_pool1d(x, x.size(2)).squeeze(2) + return x + + def forward(self, x): + x = self.embed(x) # (N, W, D) + x = x.unsqueeze(1) # (N, Ci, W, D) + x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N, Co, W), ...]*len(Ks) + x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N, Co), ...]*len(Ks) + x = torch.cat(x, 1) + + ''' + x1 = self.conv_and_pool(x,self.conv13) #(N,Co) + x2 = self.conv_and_pool(x,self.conv14) #(N,Co) + x3 = self.conv_and_pool(x,self.conv15) #(N,Co) + x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co) + ''' + x = self.dropout(x) # (N, len(Ks)*Co) + logit = self.fc1(x) # (N, C) + return logit + + def space_dimensions(self): + return self.output_size \ No newline at end of file diff --git a/src/model/model.py b/src/model/model.py new file mode 100644 index 0000000..997bb48 --- /dev/null +++ b/src/model/model.py @@ -0,0 +1,330 @@ +import numpy as np +import torch +import torch.nn as nn +from tqdm import tqdm +import math + + +def tensor2numpy(t, device): + if device == 'cpu': + t = t.cpu() + return t.detach().numpy() + + +class AuthorshipAttributionClassifier(nn.Module): + def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'): + super(AuthorshipAttributionClassifier, self).__init__() + self.projector = projector.to(device) + self.ff = FFProjection(input_size=projector.space_dimensions(), + hidden_sizes=[1024], + output_size=num_authors).to(device) + self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False) + self.device = device + + def fit(self, X, y, batch_size, epochs, lr=0.001): + self.train() + batcher = Batch(batch_size=batch_size, n_epochs=epochs) + criterion = torch.nn.CrossEntropyLoss().to(self.device) + optim = torch.optim.Adam(self.parameters(), lr=lr) + + pbar = tqdm(range(batcher.n_epochs)) + for epoch in pbar: + losses = [] + for xi, yi in batcher.epoch(X, y): + optim.zero_grad() + xi = self.padder.transform(xi) + logits = self.forward(torch.as_tensor(xi).to(self.device)) + loss = criterion(logits, torch.as_tensor(yi).to(self.device)) + loss.backward() + #clip_gradient(model) + optim.step() + losses.append(loss.item()) + pbar.set_description(f'training epoch={epoch} loss={np.mean(losses):.5f}') + + def predict(self, x, batch_size=100): + self.eval() + batcher = Batch(batch_size=batch_size, n_epochs=1, shuffle=False) + predictions = [] + for xi in tqdm(batcher.epoch(x), desc='test'): + xi = self.padder.transform(xi) + logits = self.forward(torch.as_tensor(xi).to(self.device)) + prediction = tensor2numpy(torch.argmax(logits, dim=1).view(-1), self.device) + predictions.append(prediction) + return np.concatenate(predictions) + + def forward(self, x): + phi = self.projector(x) + return self.ff(phi) + + +class SameAuthorClassifier(nn.Module): + def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'): + super(SameAuthorClassifier, self).__init__() + self.projector = projector.to(device) + self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False) + self.device = device + + def fit(self, X, y, batch_size, epochs, lr=0.001, steps_per_epoch=100): + self.train() + batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=steps_per_epoch) + optim = torch.optim.Adam(self.parameters(), lr=lr) + + pbar = tqdm(range(batcher.n_epochs)) + for epoch in pbar: + losses = [] + for xi, yi in batcher.epoch(X, y): + optim.zero_grad() + xi = self.padder.transform(xi) + phi = self.projector(xi) + #normalize phi to have norm 1? maybe better as the last step of projector + kernel = torch.matmul(phi, phi.T) + ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device) + loss = KernelAlignmentLoss(kernel, ideal_kernel) + loss.backward() + #clip_gradient(model) + optim.step() + losses.append(loss.item()) + pbar.set_description(f'training epoch={epoch} loss={np.mean(losses):.5f}') + + def predict(self, x, z, batch_size=100): + self.eval() + batcher = Batch(batch_size=batch_size, n_epochs=1, shuffle=False) + predictions = [] + for xi, zi in tqdm(batcher.epoch(x, z), desc='test'): + xi = self.padder.transform(xi) + zi = self.padder.transform(zi) + inners = self.forward(xi, zi) + prediction = tensor2numpy(inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}? + predictions.append(prediction) + return np.concatenate(predictions) + + def forward(self, x, z): + assert x.shape == z.shape, 'shape mismatch between matrices x and z' + phi_x = self.projector(x) + phi_z = self.projector(z) + rows, cols = phi_x.shape + pairwise_inners = torch.bmm(phi_x.view(rows, 1, cols), phi_z.view(rows, cols, 1)).squeeze() + return pairwise_inners + + +class FullAuthorClassifier(nn.Module): + def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'): + super(FullAuthorClassifier, self).__init__() + self.projector = projector.to(device) + self.ff = FFProjection(input_size=projector.space_dimensions(), + hidden_sizes=[1024], + output_size=num_authors).to(device) + self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False) + self.device = device + + def fit(self, X, y, batch_size, epochs, lr=0.001, steps_per_epoch=100): + self.train() + batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=steps_per_epoch) + criterion = torch.nn.CrossEntropyLoss().to(self.device) + optim = torch.optim.Adam(self.parameters(), lr=lr) + alpha = 0.5 + + pbar = tqdm(range(batcher.n_epochs)) + for epoch in pbar: + losses, sav_losses, attr_losses = [], [], [] + for xi, yi in batcher.epoch(X, y): + optim.zero_grad() + xi = self.padder.transform(xi) + phi = self.projector(xi) + #normalize phi to have norm 1? maybe better as the last step of projector + + #sav-loss + kernel = torch.matmul(phi, phi.T) + ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device) + sav_loss = KernelAlignmentLoss(kernel, ideal_kernel) + sav_losses.append(sav_loss.item()) + + #attr-loss + logits = self.ff(phi) + attr_loss = criterion(logits, torch.as_tensor(yi).to(self.device)) + attr_losses.append(attr_loss.item()) + + #loss + loss = (alpha)*sav_loss + (1-alpha)*attr_loss + losses.append(loss.item()) + + loss.backward() + #clip_gradient(model) + optim.step() + pbar.set_description( + f'training epoch={epoch} ' + f'sav-loss={np.mean(sav_losses):.5f} ' + f'attr-loss={np.mean(attr_losses):.5f} ' + f'loss={np.mean(losses):.5f}' + ) + + def predict_sav(self, x, z, batch_size=100): + self.eval() + batcher = Batch(batch_size=batch_size, n_epochs=1, shuffle=False) + predictions = [] + for xi, zi in tqdm(batcher.epoch(x, z), desc='test'): + xi = self.padder.transform(xi) + zi = self.padder.transform(zi) + phi_xi = self.projector(xi) + phi_zi = self.projector(zi) + rows, cols = phi_xi.shape + pairwise_inners = torch.bmm(phi_xi.view(rows, 1, cols), phi_zi.view(rows, cols, 1)).squeeze() + prediction = tensor2numpy(pairwise_inners, device=self.device) > 0.5 # is this correct? should it be > 0 and the ideal kernel in field {-1,+1}? + predictions.append(prediction) + return np.concatenate(predictions) + + def predict_labels(self, x, batch_size=100): + self.eval() + batcher = Batch(batch_size=batch_size, n_epochs=1, shuffle=False) + predictions = [] + for xi in tqdm(batcher.epoch(x), desc='test'): + xi = self.padder.transform(xi) + phi = self.projector(xi) + logits = self.ff(phi) + prediction = tensor2numpy( torch.argmax(logits, dim=1).view(-1), device=self.device) + predictions.append(prediction) + return np.concatenate(predictions) + + +def KernelAlignmentLoss(K, Y): + n_el = K.shape[0]*K.shape[1] + loss = torch.norm(K - Y, p='fro') # in Nello's paper this is different + loss = loss / n_el # this is in order to factor out the accumulation which is only due to the size + return loss + + +class FFProjection(nn.Module): + def __init__(self, input_size, hidden_sizes, output_size, activation=nn.functional.relu, dropout=0.5): + super(FFProjection, self).__init__() + sizes = [input_size] + hidden_sizes + [output_size] + self.ff = nn.ModuleList([ + nn.Linear(sizes[i], sizes[i+1]) for i in range(len(sizes)-1) + ]) + self.activation = activation + self.dropout = nn.Dropout(p=dropout) + + def forward(self, x): + for linear in self.ff[:-1]: + x = self.dropout(self.activation(linear(x))) + x = self.ff[-1](x) + return x + +class RNNProjection(nn.Module): + def __init__(self, vocab_size, hidden_size, output_size, device='cpu'): + super(RNNProjection, self).__init__() + self.output_size = output_size + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.num_layers=1 + self.num_directions=1 + self.device=device + + self.embedding = nn.Embedding(vocab_size, hidden_size).to(device) + self.rnn = nn.GRU( + input_size=hidden_size, + hidden_size=hidden_size, + num_layers=self.num_layers, + bidirectional=(self.num_directions == 2), + batch_first=True + ).to(device) + self.projection = nn.Linear(self.num_layers * self.num_directions * self.hidden_size, output_size).to(device) + + def init_hidden(self, batch_size): + return torch.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size).to(self.device) + + def forward(self, input): + x = torch.as_tensor(input).to(self.device) + batch_size = x.shape[0] + x = self.embedding(x) + output, hn = self.rnn(x, self.init_hidden(batch_size)) + hn = hn.view(self.num_layers, self.num_directions, batch_size, self.hidden_size) + hn = hn.permute(2, 0, 1, 3).reshape(batch_size, -1) + return self.projection(hn) + + def space_dimensions(self): + return self.output_size + + +class Batch: + def __init__(self, batch_size, n_epochs, shuffle=True): + self.batch_size = batch_size + self.n_epochs = n_epochs + self.shuffle = shuffle + self.current_epoch = 0 + + def epoch(self, *args): + lengths = list(map(len, args)) + assert max(lengths) == min(lengths), 'inconsistent sizes in args' + n_batches = math.ceil(lengths[0] / self.batch_size) + offset = 0 + if self.shuffle: + index = np.random.permutation(len(args[0])) + args = [arg[index] for arg in args] + for b in range(n_batches): + batch_idx = slice(offset, offset+self.batch_size) + batch = [arg[batch_idx] for arg in args] + yield batch if len(batch) > 1 else batch[0] + offset += self.batch_size + self.current_epoch += 1 + + +class TwoClassBatch: + """ + given a X and y (multi-label) produces batches of elements of X, y for two classes (e.g., c1, c2) + of equal size, i.e., the batch is [(x1,c1), ..., (xn,c1), (xn+1,c2), ..., (x2n,c2)] + """ + def __init__(self, batch_size, n_epochs, steps_per_epoch): + self.batch_size = batch_size + self.n_epochs = n_epochs + self.steps_per_epoch = steps_per_epoch + self.current_epoch = 0 + if self.batch_size % 2 != 0: + raise ValueError('warning, batch size is not even') + + def epoch(self, X, y): + n_el = len(y) + assert X.shape[0] == n_el, 'inconsistent sizes in X, y' + classes = np.unique(y) + groups = {ci: X[y==ci] for ci in classes} + class_prevalences = [len(groups[ci])/n_el for ci in classes] + n_choices = self.batch_size // 2 + + for b in range(self.steps_per_epoch): + class1, class2 = np.random.choice(classes, p=class_prevalences, size=2, replace=False) + X1 = np.random.choice(groups[class1], size=n_choices) + X2 = np.random.choice(groups[class2], size=n_choices) + X_batch = np.concatenate([X1,X2]) + y_batch = np.repeat([class1, class2], repeats=[n_choices,n_choices]) + yield X_batch, y_batch + self.current_epoch += 1 + + +class Padding: + def __init__(self, pad_index, max_length, dynamic=True, pad_at_end=True): + """ + :param pad_index: the index representing the PAD token + :param max_length: the length that defines the padding + :param dynamic: if True (default) pads at min(max_length, max_local_length) where max_local_length is the + length of the longest example + :param pad_at_end: if True, the pad tokens are added at the end of the lists, if otherwise they are added + at the beginning + """ + self.pad = pad_index + self.max_length = max_length + self.dynamic = dynamic + self.pad_at_end = pad_at_end + + def transform(self, X): + """ + :param X: a list of lists of indexes (integers) + :return: a ndarray of shape (n,m) where n is the number of elements in X and m is the pad length (the maximum + in elements of X if dynamic, or self.max_length if otherwise) + """ + X = [x[:self.max_length] for x in X] + lengths = list(map(len, X)) + pad_length = min(max(lengths), self.max_length) if self.dynamic else self.max_length + if self.pad_at_end: + padded = [x + [self.pad] * (pad_length - x_len) for x, x_len in zip(X, lengths)] + else: + padded = [[self.pad] * (pad_length - x_len) + x for x, x_len in zip(X, lengths)] + return np.asarray(padded, dtype=int) diff --git a/src/model/transformation.py b/src/model/transformation.py new file mode 100644 index 0000000..e69de29