From a3732cff1ec7b5829a9e8a49b3d599fe0b67a61d Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Sun, 3 May 2020 11:42:33 +0200 Subject: [PATCH] testing kta --- src/main.py | 20 +++++++++++++------- src/model/classifiers.py | 27 +++++++++++++++++++++------ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/main.py b/src/main.py index d6cd2e2..4832c1f 100644 --- a/src/main.py +++ b/src/main.py @@ -9,7 +9,6 @@ import torch from model.transformations import CNNProjection import sys - hidden_size=32 channels_out=128 output_size=1024 @@ -18,12 +17,19 @@ pad_length=3000 batch_size=50 n_epochs=256 bigrams=False +n_authors=-1 +docs_by_author=-1 -#hidden_size=16 -#output_size=32 -#pad_length=100 -#batch_size=10 -#n_epochs=20 +debug=False +if debug: + print(('*'*20)+' DEBUG MODE ' + ('*'*20)) + hidden_size=16 + output_size=32 + pad_length=100 + batch_size=10 + n_epochs=20 + n_authors = 5 + docs_by_author = 10 if torch.cuda.is_available(): device = torch.device('cuda') @@ -32,7 +38,7 @@ else: print(f'running on {device}') #dataset = Victorian(data_path='../../authorship_analysis/data/victoria', n_authors=5, docs_by_author=25) -dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=-1, docs_by_author=-1) +dataset = Imdb62(data_path='../../authorship_analysis/data/imdb62/imdb62.txt', n_authors=n_authors, docs_by_author=docs_by_author) Xtr, ytr = dataset.train.data, dataset.train.target Xte, yte = dataset.test.data, dataset.test.target A = np.unique(ytr) diff --git a/src/model/classifiers.py b/src/model/classifiers.py index de31070..a57defe 100644 --- a/src/model/classifiers.py +++ b/src/model/classifiers.py @@ -18,8 +18,8 @@ class AuthorshipAttributionClassifier(nn.Module): self.device = device def fit(self, X, y, batch_size, epochs, lr=0.001, val_prop=0.1, log='../log/tmp.csv'): - #batcher = Batch(batch_size=batch_size, n_epochs=epochs) - batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=X.shape[0]//batch_size) + batcher = Batch(batch_size=batch_size, n_epochs=epochs) + #batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=X.shape[0]//batch_size) batcher_val = Batch(batch_size=batch_size, n_epochs=epochs, shuffle=False) criterion = torch.nn.CrossEntropyLoss().to(self.device) optim = torch.optim.Adam(self.parameters(), lr=lr) @@ -33,17 +33,32 @@ class AuthorshipAttributionClassifier(nn.Module): for epoch in pbar: # training self.train() - losses = [] + losses, attr_losses, sav_losses = [], [], [] for xi, yi in batcher.epoch(X, y): optim.zero_grad() xi = self.padder.transform(xi) - logits = self.forward(xi) - loss = criterion(logits, torch.as_tensor(yi).to(self.device)) + phi = self.projector(xi) + + logits = self.ff(phi) + loss_attr = criterion(logits, torch.as_tensor(yi).to(self.device)) + + kernel = torch.matmul(phi, phi.T) + ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device) + loss_sav = KernelAlignmentLoss(kernel, ideal_kernel) + + loss = loss_attr + loss_sav + loss.backward() optim.step() + + attr_losses.append(loss_attr.item()) + sav_losses.append(loss_sav.item()) losses.append(loss.item()) tr_loss = np.mean(losses) - pbar.set_description(f'training epoch={epoch} loss={tr_loss:.5f} val_loss={val_loss:.5f}') + pbar.set_description(f'training epoch={epoch} ' + f'loss={tr_loss:.5f} ' + f'attr-loss={np.mean(attr_losses):.5f} ' + f'sav-loss={np.mean(sav_losses):.5f} val_loss={val_loss:.5f}') # validation self.eval()