From ced842a2199619961ec5ec0ccc7e85e1bf7113be Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Thu, 9 Jul 2020 17:59:49 +0200 Subject: [PATCH] phi l2 normalization outside the model, just before the KTA --- src/main.py | 24 ++++++---- src/model/classifiers.py | 13 +++--- src/model/{transformations.py => layers.py} | 52 +++++++++++++-------- 3 files changed, 54 insertions(+), 35 deletions(-) rename src/model/{transformations.py => layers.py} (78%) diff --git a/src/main.py b/src/main.py index 8685737..756d6a2 100644 --- a/src/main.py +++ b/src/main.py @@ -9,7 +9,8 @@ from model.classifiers import AuthorshipAttributionClassifier, SameAuthorClassif from data.fetch_victorian import Victorian from evaluation import evaluation import torch -from model.transformations import CNNProjection +import torch.nn as nn +from model.layers import * from util import create_path_if_not_exists import os import sys @@ -68,15 +69,20 @@ def main(opt): # attribution print('Attribution') - phi = CNNProjection( - vocabulary_size=index.vocabulary_size(), - embedding_dim=opt.hidden, - out_size=opt.repr, - channels_out=opt.chout, - kernel_sizes=opt.kernelsizes, - dropout=0.5 + phi = Phi( + cnn=CNNProjection( + vocabulary_size=index.vocabulary_size(), + embedding_dim=opt.hidden, + channels_out=opt.chout, + kernel_sizes=opt.kernelsizes), + ff=FFProjection(input_size=len(opt.kernelsizes) * opt.chout, + hidden_sizes=[1024], + output_size=opt.repr, + activation=nn.functional.relu, + dropout=0.5, + activate_last=True), + #norm=L2Norm() ).to(device) - print(phi) cls = AuthorshipAttributionClassifier( phi, num_authors=A.size, pad_index=pad_index, pad_length=opt.pad, device=device diff --git a/src/model/classifiers.py b/src/model/classifiers.py index 3e757f9..c54e2b0 100644 --- a/src/model/classifiers.py +++ b/src/model/classifiers.py @@ -1,22 +1,20 @@ import numpy as np import torch import torch.nn as nn +import torch.nn.functional as F from sklearn.metrics import accuracy_score, f1_score from tqdm import tqdm import math from sklearn.model_selection import train_test_split from model.early_stop import EarlyStop -from model.transformations import FFProjection +from model.layers import FFProjection class AuthorshipAttributionClassifier(nn.Module): def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'): super(AuthorshipAttributionClassifier, self).__init__() self.projector = projector.to(device) - #self.ff = FFProjection(input_size=projector.space_dimensions(), - # hidden_sizes=[1024], - # output_size=num_authors).to(device) - self.ff = FFProjection(input_size=projector.space_dimensions(), + self.ff = FFProjection(input_size=projector.output_size, hidden_sizes=[], output_size=num_authors).to(device) self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False, device=device) @@ -55,6 +53,8 @@ class AuthorshipAttributionClassifier(nn.Module): loss_attr_value = loss_attr.item() if alpha < 1: + phi = F.normalize(phi) + # todo: optimize (only upper diagonal) kernel = torch.matmul(phi, phi.T) ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device) @@ -76,7 +76,8 @@ class AuthorshipAttributionClassifier(nn.Module): f'loss={tr_loss:.5f} ' f'attr-loss={np.mean(attr_losses):.5f} ' f'sav-loss={np.mean(sav_losses):.5f} ' - f'val_loss={val_loss:.5f}' + f'val_loss={val_loss:.5f} ' + f'patience={early_stop.patience}/{early_stop.patience_limit}' ) # validation diff --git a/src/model/transformations.py b/src/model/layers.py similarity index 78% rename from src/model/transformations.py rename to src/model/layers.py index 6d0911e..66ba056 100644 --- a/src/model/transformations.py +++ b/src/model/layers.py @@ -4,24 +4,32 @@ import torch.nn as nn import torch.nn.functional as F +class Phi(nn.Module): + + def __init__(self, cnn, ff, norm): + super(Phi, self).__init__() + self.cnn = cnn + self.ff = ff + #self.norm = norm + self.output_size = self.ff.output_size + + def forward(self, x): + x = self.cnn(x) + x = self.ff(x) + #x = self.norm(x) + return x + + class CNNProjection(nn.Module): - def __init__(self, vocabulary_size, embedding_dim, out_size, channels_out, kernel_sizes, dropout=0.5): + def __init__(self, vocabulary_size, embedding_dim, channels_out, kernel_sizes): super(CNNProjection, self).__init__() channels_in = 1 self.embed = nn.Embedding(vocabulary_size, embedding_dim) self.convs1 = nn.ModuleList( [nn.Conv2d(channels_in, channels_out, (K, embedding_dim)) for K in kernel_sizes] ) - self.dropout = nn.Dropout(dropout) - #self.fc1 = nn.Linear(len(kernel_sizes) * channels_out, out_size) - self.fc = FFProjection(input_size=len(kernel_sizes) * channels_out, - hidden_sizes=[1024], - output_size=out_size, - activation=nn.functional.relu, - dropout=dropout, - activate_last=True) - self.output_size = out_size + self.output_size = len(kernel_sizes) * channels_out def convolve(self, x): x = x.unsqueeze(1) # (N, Ci, W, D) @@ -34,20 +42,22 @@ class CNNProjection(nn.Module): x = F.max_pool1d(x, x.size(2)).squeeze(2) return x - def l2norm(self, x): - norm = x.norm(p=2, dim=1, keepdim=True) - x = x.div(norm.expand_as(x)) - return x - def forward(self, x): x = self.embed(x) # (N, W, D) x = self.convolve(x) # (N, len(Ks)*Co] - x = self.fc(x) - x = self.l2norm(x) return x - def space_dimensions(self): - return self.output_size + +class L2Norm(nn.Module): + def __init__(self, p=2, dim=-1): + super(L2Norm, self).__init__() + self.p=p + self.dim=dim + + def forward(self, x): + norm = x.norm(p=self.p, dim=self.dim, keepdim=True) + x = x.div(norm.expand_as(x)) + return x class FFProjection(nn.Module): @@ -61,16 +71,18 @@ class FFProjection(nn.Module): self.activation = activation self.dropout = nn.Dropout(p=dropout) self.activate_last = activate_last + self.output_size = output_size def forward(self, x): last_layer_idx = len(self.ff)-1 - for i,linear in enumerate(self.ff): + for i, linear in enumerate(self.ff): x = linear(x) if i < last_layer_idx or self.activate_last: x = self.dropout(self.activation(x)) return x +# deprecated class RNNProjection(nn.Module): def __init__(self, vocab_size, hidden_size, output_size, device='cpu'): super(RNNProjection, self).__init__()