phi l2 normalization outside the model, just before the KTA
This commit is contained in:
parent
c32d9da567
commit
ced842a219
24
src/main.py
24
src/main.py
|
@ -9,7 +9,8 @@ from model.classifiers import AuthorshipAttributionClassifier, SameAuthorClassif
|
||||||
from data.fetch_victorian import Victorian
|
from data.fetch_victorian import Victorian
|
||||||
from evaluation import evaluation
|
from evaluation import evaluation
|
||||||
import torch
|
import torch
|
||||||
from model.transformations import CNNProjection
|
import torch.nn as nn
|
||||||
|
from model.layers import *
|
||||||
from util import create_path_if_not_exists
|
from util import create_path_if_not_exists
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
@ -68,15 +69,20 @@ def main(opt):
|
||||||
|
|
||||||
# attribution
|
# attribution
|
||||||
print('Attribution')
|
print('Attribution')
|
||||||
phi = CNNProjection(
|
phi = Phi(
|
||||||
vocabulary_size=index.vocabulary_size(),
|
cnn=CNNProjection(
|
||||||
embedding_dim=opt.hidden,
|
vocabulary_size=index.vocabulary_size(),
|
||||||
out_size=opt.repr,
|
embedding_dim=opt.hidden,
|
||||||
channels_out=opt.chout,
|
channels_out=opt.chout,
|
||||||
kernel_sizes=opt.kernelsizes,
|
kernel_sizes=opt.kernelsizes),
|
||||||
dropout=0.5
|
ff=FFProjection(input_size=len(opt.kernelsizes) * opt.chout,
|
||||||
|
hidden_sizes=[1024],
|
||||||
|
output_size=opt.repr,
|
||||||
|
activation=nn.functional.relu,
|
||||||
|
dropout=0.5,
|
||||||
|
activate_last=True),
|
||||||
|
#norm=L2Norm()
|
||||||
).to(device)
|
).to(device)
|
||||||
print(phi)
|
|
||||||
|
|
||||||
cls = AuthorshipAttributionClassifier(
|
cls = AuthorshipAttributionClassifier(
|
||||||
phi, num_authors=A.size, pad_index=pad_index, pad_length=opt.pad, device=device
|
phi, num_authors=A.size, pad_index=pad_index, pad_length=opt.pad, device=device
|
||||||
|
|
|
@ -1,22 +1,20 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
from sklearn.metrics import accuracy_score, f1_score
|
from sklearn.metrics import accuracy_score, f1_score
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import math
|
import math
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from model.early_stop import EarlyStop
|
from model.early_stop import EarlyStop
|
||||||
from model.transformations import FFProjection
|
from model.layers import FFProjection
|
||||||
|
|
||||||
|
|
||||||
class AuthorshipAttributionClassifier(nn.Module):
|
class AuthorshipAttributionClassifier(nn.Module):
|
||||||
def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
|
def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
|
||||||
super(AuthorshipAttributionClassifier, self).__init__()
|
super(AuthorshipAttributionClassifier, self).__init__()
|
||||||
self.projector = projector.to(device)
|
self.projector = projector.to(device)
|
||||||
#self.ff = FFProjection(input_size=projector.space_dimensions(),
|
self.ff = FFProjection(input_size=projector.output_size,
|
||||||
# hidden_sizes=[1024],
|
|
||||||
# output_size=num_authors).to(device)
|
|
||||||
self.ff = FFProjection(input_size=projector.space_dimensions(),
|
|
||||||
hidden_sizes=[],
|
hidden_sizes=[],
|
||||||
output_size=num_authors).to(device)
|
output_size=num_authors).to(device)
|
||||||
self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False, device=device)
|
self.padder = Padding(pad_index=pad_index, max_length=pad_length, dynamic=True, pad_at_end=False, device=device)
|
||||||
|
@ -55,6 +53,8 @@ class AuthorshipAttributionClassifier(nn.Module):
|
||||||
loss_attr_value = loss_attr.item()
|
loss_attr_value = loss_attr.item()
|
||||||
|
|
||||||
if alpha < 1:
|
if alpha < 1:
|
||||||
|
phi = F.normalize(phi)
|
||||||
|
|
||||||
# todo: optimize (only upper diagonal)
|
# todo: optimize (only upper diagonal)
|
||||||
kernel = torch.matmul(phi, phi.T)
|
kernel = torch.matmul(phi, phi.T)
|
||||||
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
|
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
|
||||||
|
@ -76,7 +76,8 @@ class AuthorshipAttributionClassifier(nn.Module):
|
||||||
f'loss={tr_loss:.5f} '
|
f'loss={tr_loss:.5f} '
|
||||||
f'attr-loss={np.mean(attr_losses):.5f} '
|
f'attr-loss={np.mean(attr_losses):.5f} '
|
||||||
f'sav-loss={np.mean(sav_losses):.5f} '
|
f'sav-loss={np.mean(sav_losses):.5f} '
|
||||||
f'val_loss={val_loss:.5f}'
|
f'val_loss={val_loss:.5f} '
|
||||||
|
f'patience={early_stop.patience}/{early_stop.patience_limit}'
|
||||||
)
|
)
|
||||||
|
|
||||||
# validation
|
# validation
|
||||||
|
|
|
@ -4,24 +4,32 @@ import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Phi(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, cnn, ff, norm):
|
||||||
|
super(Phi, self).__init__()
|
||||||
|
self.cnn = cnn
|
||||||
|
self.ff = ff
|
||||||
|
#self.norm = norm
|
||||||
|
self.output_size = self.ff.output_size
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.cnn(x)
|
||||||
|
x = self.ff(x)
|
||||||
|
#x = self.norm(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
class CNNProjection(nn.Module):
|
class CNNProjection(nn.Module):
|
||||||
|
|
||||||
def __init__(self, vocabulary_size, embedding_dim, out_size, channels_out, kernel_sizes, dropout=0.5):
|
def __init__(self, vocabulary_size, embedding_dim, channels_out, kernel_sizes):
|
||||||
super(CNNProjection, self).__init__()
|
super(CNNProjection, self).__init__()
|
||||||
channels_in = 1
|
channels_in = 1
|
||||||
self.embed = nn.Embedding(vocabulary_size, embedding_dim)
|
self.embed = nn.Embedding(vocabulary_size, embedding_dim)
|
||||||
self.convs1 = nn.ModuleList(
|
self.convs1 = nn.ModuleList(
|
||||||
[nn.Conv2d(channels_in, channels_out, (K, embedding_dim)) for K in kernel_sizes]
|
[nn.Conv2d(channels_in, channels_out, (K, embedding_dim)) for K in kernel_sizes]
|
||||||
)
|
)
|
||||||
self.dropout = nn.Dropout(dropout)
|
self.output_size = len(kernel_sizes) * channels_out
|
||||||
#self.fc1 = nn.Linear(len(kernel_sizes) * channels_out, out_size)
|
|
||||||
self.fc = FFProjection(input_size=len(kernel_sizes) * channels_out,
|
|
||||||
hidden_sizes=[1024],
|
|
||||||
output_size=out_size,
|
|
||||||
activation=nn.functional.relu,
|
|
||||||
dropout=dropout,
|
|
||||||
activate_last=True)
|
|
||||||
self.output_size = out_size
|
|
||||||
|
|
||||||
def convolve(self, x):
|
def convolve(self, x):
|
||||||
x = x.unsqueeze(1) # (N, Ci, W, D)
|
x = x.unsqueeze(1) # (N, Ci, W, D)
|
||||||
|
@ -34,20 +42,22 @@ class CNNProjection(nn.Module):
|
||||||
x = F.max_pool1d(x, x.size(2)).squeeze(2)
|
x = F.max_pool1d(x, x.size(2)).squeeze(2)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def l2norm(self, x):
|
|
||||||
norm = x.norm(p=2, dim=1, keepdim=True)
|
|
||||||
x = x.div(norm.expand_as(x))
|
|
||||||
return x
|
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
x = self.embed(x) # (N, W, D)
|
x = self.embed(x) # (N, W, D)
|
||||||
x = self.convolve(x) # (N, len(Ks)*Co]
|
x = self.convolve(x) # (N, len(Ks)*Co]
|
||||||
x = self.fc(x)
|
|
||||||
x = self.l2norm(x)
|
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def space_dimensions(self):
|
|
||||||
return self.output_size
|
class L2Norm(nn.Module):
|
||||||
|
def __init__(self, p=2, dim=-1):
|
||||||
|
super(L2Norm, self).__init__()
|
||||||
|
self.p=p
|
||||||
|
self.dim=dim
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
norm = x.norm(p=self.p, dim=self.dim, keepdim=True)
|
||||||
|
x = x.div(norm.expand_as(x))
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
class FFProjection(nn.Module):
|
class FFProjection(nn.Module):
|
||||||
|
@ -61,16 +71,18 @@ class FFProjection(nn.Module):
|
||||||
self.activation = activation
|
self.activation = activation
|
||||||
self.dropout = nn.Dropout(p=dropout)
|
self.dropout = nn.Dropout(p=dropout)
|
||||||
self.activate_last = activate_last
|
self.activate_last = activate_last
|
||||||
|
self.output_size = output_size
|
||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
last_layer_idx = len(self.ff)-1
|
last_layer_idx = len(self.ff)-1
|
||||||
for i,linear in enumerate(self.ff):
|
for i, linear in enumerate(self.ff):
|
||||||
x = linear(x)
|
x = linear(x)
|
||||||
if i < last_layer_idx or self.activate_last:
|
if i < last_layer_idx or self.activate_last:
|
||||||
x = self.dropout(self.activation(x))
|
x = self.dropout(self.activation(x))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
# deprecated
|
||||||
class RNNProjection(nn.Module):
|
class RNNProjection(nn.Module):
|
||||||
def __init__(self, vocab_size, hidden_size, output_size, device='cpu'):
|
def __init__(self, vocab_size, hidden_size, output_size, device='cpu'):
|
||||||
super(RNNProjection, self).__init__()
|
super(RNNProjection, self).__init__()
|
Loading…
Reference in New Issue