kernel loss based on BCE

2020-07-23 14:29:00 +02:00 · 2020-07-23 14:29:00 +02:00 · acb38d4aae
parent be83411e25
commit acb38d4aae
3 changed files with 57 additions and 29 deletions
--- a/src/main.py
+++ b/src/main.py
@ -81,7 +81,6 @@ def main(opt):
                     activation=nn.functional.relu,
                     dropout=0.5,
                     activate_last=True),
-        #norm=L2Norm()
    ).to(device)

    cls = AuthorshipAttributionClassifier(
--- a/src/model/classifiers.py
+++ b/src/model/classifiers.py
@ -27,6 +27,7 @@ class AuthorshipAttributionClassifier(nn.Module):
        #batcher = TwoClassBatch(batch_size=batch_size, n_epochs=epochs, steps_per_epoch=X.shape[0]//batch_size)
        batcher_val = Batch(batch_size=batch_size, n_epochs=epochs, shuffle=False)
        criterion = torch.nn.CrossEntropyLoss().to(self.device)
+        savcriterion = torch.nn.BCEWithLogitsLoss().to(self.device)
        optim = torch.optim.Adam(self.parameters(), lr=lr)

        X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y)
@ -53,15 +54,25 @@ class AuthorshipAttributionClassifier(nn.Module):
                        loss_attr_value = loss_attr.item()

                    if alpha < 1:
-                        phi = F.normalize(phi)

-                        # todo: optimize (only upper diagonal)
-                        kernel = torch.matmul(phi, phi.T)
-                        ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
+                        # choose balanced number of positive (same author) and negative (different authors)
+                        idx1, idx2, sav_labels = choose_sav_pairs(yi, npairs=batch_size)
+
+                        phi1 = phi[idx1]
+                        phi2 = phi[idx2]
+                        cross = torch.bmm(phi1.unsqueeze(1), phi2.unsqueeze(2).permute(0,1,2)).squeeze()
+                        loss_sav = savcriterion(cross.unsqueeze(0), torch.as_tensor(sav_labels).float().unsqueeze(0).to(self.device))
+                        loss_sav_value = loss_sav.item()
+
+                        # add a cross-entropy based criterion (instead of KTA -- let's see how it works)
+
+                        ## todo: optimize (only upper diagonal)
+                        #kernel = torch.matmul(phi, phi.T)
+                        #ideal_kernel = torch.as_tensor(1 * (np.outer(1 + yi, 1 / (yi + 1)) == 1)).to(self.device)
                        # todo: maybe the KALoss should take into consideration the balance (it is more likely to have
                        # a pair of negative examples than positives)
-                        loss_sav = KernelAlignmentLoss(kernel, ideal_kernel)
-                        loss_sav_value = loss_sav.item()
+                        #loss_sav = KernelAlignmentLoss(kernel, ideal_kernel)
+                        #loss_sav_value = loss_sav.item()

                    loss = loss_attr*alpha + loss_sav*(1.-alpha)

@ -77,8 +88,7 @@ class AuthorshipAttributionClassifier(nn.Module):
                                         f'attr-loss={np.mean(attr_losses):.5f} '
                                         f'sav-loss={np.mean(sav_losses):.5f} '
                                         f'val_loss={val_loss:.5f} '
-                                         f'patience={early_stop.patience}/{early_stop.patience_limit}'
-                                         )
+                                         f'patience={early_stop.patience}/{early_stop.patience_limit}')

                # validation
                self.eval()
@ -126,6 +136,38 @@ class AuthorshipAttributionClassifier(nn.Module):
        return self.ff(phi)


+def choose_sav_pairs(y, npairs):
+    n = len(y)
+    y = y+1  # reindex from [0..n_classes-1] to [1..n_classes] for convenience
+    same_author = (np.outer(y, 1/y) == 1)
+    triu = np.triu_indices(n, k=1)
+    same_author_nodup = same_author[triu]
+    idxi, idxj = triu
+
+    posi, negi = idxi[same_author_nodup], idxi[same_author_nodup == False]
+    posj, negj = idxj[same_author_nodup], idxj[same_author_nodup == False]
+    num_pos = same_author_nodup.sum()
+    num_neg = len(same_author_nodup)-num_pos  # == len(posj)
+
+    # balanced:
+    pos_take = np.random.choice(np.arange(num_pos), npairs//2, replace=num_pos < npairs//2)
+    posi, posj = posi[pos_take], posj[pos_take]
+
+    neg_take = np.random.choice(np.arange(num_neg), npairs//2, replace=num_neg < npairs//2)
+    negi, negj = negi[neg_take], negj[neg_take]
+
+    idx1 = np.concatenate([posi, negi])
+    idx2 = np.concatenate([posj, negj])
+    savlabels = np.array([1]*len(posi) + [0]*len(negi))
+
+    print(f'generated {len(posi)} pos and {len(negi)}')
+    return idx1, idx2, savlabels
+
+
+
+
+
+
 class SameAuthorClassifier(nn.Module):
    def __init__(self, projector, num_authors, pad_index, pad_length=500, device='cpu'):
        super(SameAuthorClassifier, self).__init__()
@ -255,11 +297,11 @@ class FullAuthorClassifier(nn.Module):
        return np.concatenate(predictions)


-def KernelAlignmentLoss(K, Y):
-    n_el = K.shape[0]*K.shape[1]
-    loss = torch.norm(K - Y, p='fro')  # in Nello's paper this is different
-    loss = loss / n_el  # this is in order to factor out the accumulation which is only due to the size
-    return loss
+#def KernelAlignmentLoss(K, Y):
+#    n_el = K.shape[0]*K.shape[1]
+#    loss = torch.norm(K - Y, p='fro')  # in Nello's paper this is different
+#    loss = loss / n_el  # this is in order to factor out the accumulation which is only due to the size
+#    return loss



--- a/src/model/layers.py
+++ b/src/model/layers.py
@ -6,17 +6,16 @@ import torch.nn.functional as F

 class Phi(nn.Module):

-    def __init__(self, cnn, ff, norm=None):
+    def __init__(self, cnn, ff):
        super(Phi, self).__init__()
        self.cnn = cnn
        self.ff = ff
-        #self.norm = norm
        self.output_size = self.ff.output_size

    def forward(self, x):
        x = self.cnn(x)
        x = self.ff(x)
-        #x = self.norm(x)
+        x = F.normalize(x, p=2, dim=-1)
        return x


@ -48,18 +47,6 @@ class CNNProjection(nn.Module):
        return x


-class L2Norm(nn.Module):
-    def __init__(self, p=2, dim=-1):
-        super(L2Norm, self).__init__()
-        self.p=p
-        self.dim=dim
-
-    def forward(self, x):
-        norm = x.norm(p=self.p, dim=self.dim, keepdim=True)
-        x = x.div(norm.expand_as(x))
-        return x
-
-
 class FFProjection(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, activation=nn.functional.relu, dropout=0.5,
                 activate_last=False):