kernel loss based on BCE
This commit is contained in:
parent
be83411e25
commit
acb38d4aae
|
@ -81,7 +81,6 @@ def main(opt):
|
|||
activation=nn.functional.relu,
|
||||
dropout=0.5,
|
||||
activate_last=True),
|
||||
#norm=L2Norm()
|
||||
).to(device)
|
||||
|
||||
cls = AuthorshipAttributionClassifier(
|
||||
|
|
|
@ -27,6 +27,7 @@ class AuthorshipAttributionClassifier(nn.Module):
|
|||
#batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=X.shape[0]//batch_size)
|
||||
batcher_val = Batch(batch_size=batch_size, n_epochs=epochs, shuffle=False)
|
||||
criterion = torch.nn.CrossEntropyLoss().to(self.device)
|
||||
savcriterion = torch.nn.BCEWithLogitsLoss().to(self.device)
|
||||
optim = torch.optim.Adam(self.parameters(), lr=lr)
|
||||
|
||||
X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y)
|
||||
|
@ -53,15 +54,25 @@ class AuthorshipAttributionClassifier(nn.Module):
|
|||
loss_attr_value = loss_attr.item()
|
||||
|
||||
if alpha < 1:
|
||||
phi = F.normalize(phi)
|
||||
|
||||
# todo: optimize (only upper diagonal)
|
||||
kernel = torch.matmul(phi, phi.T)
|
||||
ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
|
||||
# choose balanced number of positive (same author) and negative (different authors)
|
||||
idx1, idx2, sav_labels = choose_sav_pairs(yi, npairs=batch_size)
|
||||
|
||||
phi1 = phi[idx1]
|
||||
phi2 = phi[idx2]
|
||||
cross = torch.bmm(phi1.unsqueeze(1), phi2.unsqueeze(2).permute(0,1,2)).squeeze()
|
||||
loss_sav = savcriterion(cross.unsqueeze(0), torch.as_tensor(sav_labels).float().unsqueeze(0).to(self.device))
|
||||
loss_sav_value = loss_sav.item()
|
||||
|
||||
# add a cross-entropy based criterion (instead of KTA -- let's see how it works)
|
||||
|
||||
## todo: optimize (only upper diagonal)
|
||||
#kernel = torch.matmul(phi, phi.T)
|
||||
#ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
|
||||
# todo: maybe the KALoss should take into consideration the balance (it is more likely to have
|
||||
# a pair of negative examples than positives)
|
||||
loss_sav = KernelAlignmentLoss(kernel, ideal_kernel)
|
||||
loss_sav_value = loss_sav.item()
|
||||
#loss_sav = KernelAlignmentLoss(kernel, ideal_kernel)
|
||||
#loss_sav_value = loss_sav.item()
|
||||
|
||||
loss = loss_attr*alpha + loss_sav*(1.-alpha)
|
||||
|
||||
|
@ -77,8 +88,7 @@ class AuthorshipAttributionClassifier(nn.Module):
|
|||
f'attr-loss={np.mean(attr_losses):.5f} '
|
||||
f'sav-loss={np.mean(sav_losses):.5f} '
|
||||
f'val_loss={val_loss:.5f} '
|
||||
f'patience={early_stop.patience}/{early_stop.patience_limit}'
|
||||
)
|
||||
f'patience={early_stop.patience}/{early_stop.patience_limit}')
|
||||
|
||||
# validation
|
||||
self.eval()
|
||||
|
@ -126,6 +136,38 @@ class AuthorshipAttributionClassifier(nn.Module):
|
|||
return self.ff(phi)
|
||||
|
||||
|
||||
def choose_sav_pairs(y, npairs):
|
||||
n = len(y)
|
||||
y = y+1 # reindex from [0..n_classes-1] to [1..n_classes] for convenience
|
||||
same_author = (np.outer(y, 1/y) == 1)
|
||||
triu = np.triu_indices(n, k=1)
|
||||
same_author_nodup = same_author[triu]
|
||||
idxi, idxj = triu
|
||||
|
||||
posi, negi = idxi[same_author_nodup], idxi[same_author_nodup == False]
|
||||
posj, negj = idxj[same_author_nodup], idxj[same_author_nodup == False]
|
||||
num_pos = same_author_nodup.sum()
|
||||
num_neg = len(same_author_nodup)-num_pos # == len(posj)
|
||||
|
||||
# balanced:
|
||||
pos_take = np.random.choice(np.arange(num_pos), npairs//2, replace=num_pos < npairs//2)
|
||||
posi, posj = posi[pos_take], posj[pos_take]
|
||||
|
||||
neg_take = np.random.choice(np.arange(num_neg), npairs//2, replace=num_neg < npairs//2)
|
||||
negi, negj = negi[neg_take], negj[neg_take]
|
||||
|
||||
idx1 = np.concatenate([posi, negi])
|
||||
idx2 = np.concatenate([posj, negj])
|
||||
savlabels = np.array([1]*len(posi) + [0]*len(negi))
|
||||
|
||||
print(f'generated {len(posi)} pos and {len(negi)}')
|
||||
return idx1, idx2, savlabels
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class SameAuthorClassifier(nn.Module):
|
||||
def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
|
||||
super(SameAuthorClassifier, self).__init__()
|
||||
|
@ -255,11 +297,11 @@ class FullAuthorClassifier(nn.Module):
|
|||
return np.concatenate(predictions)
|
||||
|
||||
|
||||
def KernelAlignmentLoss(K, Y):
|
||||
n_el = K.shape[0]*K.shape[1]
|
||||
loss = torch.norm(K - Y, p='fro') # in Nello's paper this is different
|
||||
loss = loss / n_el # this is in order to factor out the accumulation which is only due to the size
|
||||
return loss
|
||||
#def KernelAlignmentLoss(K, Y):
|
||||
# n_el = K.shape[0]*K.shape[1]
|
||||
# loss = torch.norm(K - Y, p='fro') # in Nello's paper this is different
|
||||
# loss = loss / n_el # this is in order to factor out the accumulation which is only due to the size
|
||||
# return loss
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -6,17 +6,16 @@ import torch.nn.functional as F
|
|||
|
||||
class Phi(nn.Module):
|
||||
|
||||
def __init__(self, cnn, ff, norm=None):
|
||||
def __init__(self, cnn, ff):
|
||||
super(Phi, self).__init__()
|
||||
self.cnn = cnn
|
||||
self.ff = ff
|
||||
#self.norm = norm
|
||||
self.output_size = self.ff.output_size
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cnn(x)
|
||||
x = self.ff(x)
|
||||
#x = self.norm(x)
|
||||
x = F.normalize(x, p=2, dim=-1)
|
||||
return x
|
||||
|
||||
|
||||
|
@ -48,18 +47,6 @@ class CNNProjection(nn.Module):
|
|||
return x
|
||||
|
||||
|
||||
class L2Norm(nn.Module):
|
||||
def __init__(self, p=2, dim=-1):
|
||||
super(L2Norm, self).__init__()
|
||||
self.p=p
|
||||
self.dim=dim
|
||||
|
||||
def forward(self, x):
|
||||
norm = x.norm(p=self.p, dim=self.dim, keepdim=True)
|
||||
x = x.div(norm.expand_as(x))
|
||||
return x
|
||||
|
||||
|
||||
class FFProjection(nn.Module):
|
||||
def __init__(self, input_size, hidden_sizes, output_size, activation=nn.functional.relu, dropout=0.5,
|
||||
activate_last=False):
|
||||
|
|
Loading…
Reference in New Issue