From 908fc2d6daedd1c5afffbfaa84e3eb824fbb7e0f Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Mon, 1 Feb 2021 11:25:13 +0100 Subject: [PATCH] changing remote --- TODO.txt | 42 ++++++++++++++++++---------------------- src/model/classifiers.py | 2 +- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/TODO.txt b/TODO.txt index 7521cbd..86864fa 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,30 +1,26 @@ Things to clarify: -maybe I have to review the validation of the sav-loss; since it is batched, it might be always checking the same - submatrices of for alignment, and those may be mostly positive or mostly near an identity? -maybe the sav-loss is something which may have sense to impose, as a regularization, across many last layers, and not - only the last one? + +about the network: +================== +remove the .to() calls inside the Module and use the self.on_cpu instead process datasets and leave it as a generic parameter padding could start at any random point between [0, length_i-pad_length] - in training, pad to the shortest - in test, pad to the largest -save and restore checkpoints -should the phi(x) be normalized? if so: - - better at the last step of phi? - - better outside phi, previous to the gram matrix computation? -should the single-label classifier have some sort of non linearity from the phi(x) to the labels? + + +about the loss and the KTA: +=========================== +not clear whether we should define the loss as in "On kernel target alignment", i.e., a numerator with f (and + change sign to minimize) or as |K-Y|f norm. What about the denominator (now, the normalization factor is n**2)? +maybe the sav-loss is something which may have sense to impose, as a regularization, across many last layers, and not + only the last one? + +are the contribution of the two losses comparable? or one contributes far more than the other? +is the TwoClassBatch the best way? +maybe I have to review the validation of the sav-loss; since it is batched, it might be always checking the same + submatrices of for alignment, and those may be mostly positive or mostly near an identity? SAV: how should the range of k(xi,xj) be interpreted? how to decide for value threshold for returning -1 or +1? I guess the best thing to do is to learn a simple threshold, one feed forward 1-to-1 -is the TwoClassBatch the best way? -are the contribution of the two losses comparable? or one contributes far more than the other? -what is the best representation for inputs? char-based? ngrams-based? word-based? or a multichannel one? - I think this is irrelevant for the paper -not clear whether the single-label classifier should work out a ff on top of the intermediate representation, or should it - instead work directly on the representations with one simple linear projection; not clear either whether the kernel - should be computed on any further elaboration from the intermediate representation... thing is, that the - is imposing unimodality (documents from the same author should point in a single direction) while working out another - representation for the single-label classifier could instead relax this and attribute to the same author vectors that - come from a multimodal distribution. No... This "unimodality" should exist anyway in the last layer. Indeed I start - thinking that the optimum for any classifier should already impose something similar to the KTA criteria in the - last layer... Is this redundant? -not clear whether we should define the loss as in "On kernel target alignment", i.e., a numerator with f (and - change sign to minimize) or as |K-Y|f norm. What about the denominator (now, the normalization factor is n**2)? \ No newline at end of file + + diff --git a/src/model/classifiers.py b/src/model/classifiers.py index e69a919..118ad0a 100644 --- a/src/model/classifiers.py +++ b/src/model/classifiers.py @@ -33,6 +33,7 @@ class AuthorshipAttributionClassifier(nn.Module): X, Xval, y, yval = train_test_split(X, y, test_size=val_prop, stratify=y) with open(log, 'wt') as foo: + print() foo.write('epoch\ttr-loss\tval-loss\tval-acc\tval-Mf1\tval-mf1\n') tr_loss, val_loss = -1, -1 pbar = tqdm(range(1, batcher.n_epochs+1)) @@ -160,7 +161,6 @@ def choose_sav_pairs(y, npairs): idx2 = np.concatenate([posj, negj]) savlabels = np.array([1]*len(posi) + [0]*len(negi)) - print(f'generated {len(posi)} pos and {len(negi)}') return idx1, idx2, savlabels