reworked unsupervised (aligned) embeddings loader method and class (fastText and MUSE).

new op.arg -t ['MUSE', 'FastText']
uploaded /results/results.csv (on rcv1 ... run0.pickle) obtained on all available setup.
TODO: refactor it also as a standalone class with its own load/weighted sum/extract/reduce methods.
This commit is contained in:
andrea 2019-11-30 19:14:51 +01:00
parent 499c6018c0
commit f2083bf22a
5 changed files with 107 additions and 30 deletions

View File

@ -17,11 +17,14 @@ parser.add_option("-o", "--output", dest="output",
help="Result file", type=str, default='./results/results.csv')
parser.add_option("-e", "--mode-embed", dest="mode_embed",
help="Set the embedding to be used [none, pretrained, supervised, both]", type=str, default='none')
help="Set the embedding to be used [none, unsupervised, supervised, both]", type=str, default='none')
parser.add_option("-w", "--we-path", dest="we_path",
help="Path to the polylingual word embeddings", default='../embeddings/')
parser.add_option('-t', "--we-type", dest="we_type", help="Aligned embeddings to use [FastText, MUSE]", type=str,
default='FastText')
parser.add_option("-s", "--set_c", dest="set_c",type=float,
help="Set the C parameter", default=1)
@ -36,7 +39,7 @@ def get_learner(calibrate=False, kernel='linear'):
return SVC(kernel=kernel, probability=calibrate, cache_size=1000, C=op.set_c, random_state=1, class_weight='balanced')
def get_params(dense=False): # TODO kernel function could be useful for meta-classifier
def get_params(dense=False):
if not op.optimc:
return None
c_range = [1e4, 1e3, 1e2, 1e1, 1, 1e-1]
@ -72,30 +75,36 @@ if __name__ == '__main__':
# Embeddings and WCE config
_available_mode = ['none', 'unsupervised', 'supervised', 'both']
assert op.mode_embed in _available_mode , f'{op.mode_embed} not in {_available_mode}'
_available_type = ['MUSE', 'FastText']
assert op.mode_embed in _available_mode, f'{op.mode_embed} not in {_available_mode}'
assert op.we_type in _available_type, f'{op.we_type} not in {_available_type}'
if op.mode_embed == 'none':
config = {'unsupervised': False,
'supervised': False}
'supervised': False,
'we_type': None}
_config_id = 'None'
elif op.mode_embed == 'unsupervised':
config = {'unsupervised': True,
'supervised': False}
'supervised': False,
'we_type': op.we_type}
_config_id = 'M'
elif op.mode_embed == 'supervised':
config = {'unsupervised': False,
'supervised': True}
'supervised': True,
'we_type': None}
_config_id = 'F'
elif op.mode_embed == 'both':
config = {'unsupervised': True,
'supervised': True}
'supervised': True,
'we_type': op.we_type}
_config_id = 'M_and_F'
result_id = dataset_file + 'PolyEmbedd_andrea_' + _config_id + ('_optimC' if op.optimc else '')
print(f'### PolyEmbedd_andrea_{_config_id}\n')
classifier = AndreaCLF(op.we_path,
config,
classifier = AndreaCLF(we_path=op.we_path,
config=config,
first_tier_learner=get_learner(calibrate=True),
meta_learner=get_learner(calibrate=False, kernel='rbf'),
first_tier_parameters=get_params(dense=False),
@ -114,5 +123,5 @@ if __name__ == '__main__':
metrics.append([macrof1, microf1, macrok, microk])
print('Lang %s: macro-F1=%.3f micro-F1=%.3f' % (lang, macrof1, microf1))
results.add_row(result_id, 'PolyEmbed_andrea', 'svm', _config_id, op.optimc, op.dataset.split('/')[-1],
'not_binary', 'not_ablation', classifier.time, lang, macrof1, microf1, macrok, microk, 'nope')
'not_binary', 'not_ablation', classifier.time, lang, macrof1, microf1, macrok, microk, '')
print('Averages: MF1, mF1, MK, mK', np.mean(np.array(metrics), axis=0))

View File

@ -147,7 +147,7 @@ class FastTextWikiNews(Vectors):
url_base = 'Cant auto-download MUSE embeddings'
path = '/storage/andrea/FUNNELING/embeddings/wiki.multi.{}.vec'
_name = 'wiki.multi.{}.vec'
_name = '/embeddings/wiki.multi.{}.vec'
def __init__(self, cache, language="en", **kwargs):
url = self.url_base.format(language)
@ -157,6 +157,30 @@ class FastTextWikiNews(Vectors):
super(FastTextWikiNews, self).__init__(name, cache=cache, url=url, **kwargs)
class EmbeddingsAligned(Vectors):
def __init__(self, type, path, lang):
self.name = '/embeddings/wiki.multi.{}.vec' if type == 'MUSE' else '/embeddings_polyFASTTEXT/wiki.{}.align.vec'
# todo - rewrite as relative path
self.cache_path = '/home/andreapdr/CLESA/embeddings' if type == 'MUSE' else '/home/andreapdr/CLESA/embeddings_polyFASTTEXT'
self.path = path + self.name.format(lang)
assert os.path.exists(path), f'pre-trained vectors not found in {path}'
super(EmbeddingsAligned, self).__init__(self.path, cache=self.cache_path)
def vocabulary(self):
return set(self.stoi.keys())
def dim(self):
return self.dim
def extract(self, words):
source_idx, target_idx = PretrainedEmbeddings.reindex(words, self.stoi)
extraction = torch.zeros((len(words), self.dim))
extraction[source_idx] = self.vectors[target_idx]
return extraction
class FastTextMUSE(PretrainedEmbeddings):
def __init__(self, path, lang, limit=None):
@ -179,12 +203,12 @@ class FastTextMUSE(PretrainedEmbeddings):
return extraction
def embedding_matrix(path, voc, lang):
def embedding_matrix(type, path, voc, lang):
vocabulary = np.asarray(list(zip(*sorted(voc.items(), key=lambda x:x[1])))[0])
print('[embedding matrix]')
print(f'# [pretrained-matrix: FastTextMUSE {lang}]')
pretrained = FastTextMUSE(path, lang)
print(f'# [pretrained-matrix: {type} {lang}]')
pretrained = EmbeddingsAligned(type, path, lang)
P = pretrained.extract(vocabulary).numpy()
del pretrained
print(f'[embedding matrix done] of shape={P.shape}\n')

View File

@ -7,8 +7,6 @@ from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from joblib import Parallel, delayed
from sklearn.feature_extraction.text import TfidfVectorizer
from data.supervised import zscores
from transformers.StandardizeTransformer import StandardizeTransformer
@ -444,7 +442,8 @@ class AndreaCLF(FunnellingPolylingualClassifier):
first_tier_parameters=None,
meta_parameters=None,
folded_projections=1,
calmode='cal', n_jobs=-1):
calmode='cal',
n_jobs=-1):
super().__init__(first_tier_learner,
meta_learner,
@ -479,9 +478,8 @@ class AndreaCLF(FunnellingPolylingualClassifier):
self.languages.append(lang)
tfidf_vectorizer.fit(lX[lang])
lX[lang] = tfidf_vectorizer.transform(lX[lang])
_sort_if_sparse(lX[lang])
self.lang_word2idx[lang] = tfidf_vectorizer.vocabulary_
self.lang_tfidf[lang] = tfidf_vectorizer # utile in fase di testing
self.lang_tfidf[lang] = tfidf_vectorizer
return self
# @override std class method
@ -517,15 +515,13 @@ class AndreaCLF(FunnellingPolylingualClassifier):
if unsupervised:
for lang in languages:
# print('Test building embedding matrix FastTextMuse ...')
_, M = embedding_matrix(self.we_path, self.lang_word2idx[lang], lang)
_, M = embedding_matrix(self.config['we_type'], self.we_path, self.lang_word2idx[lang], lang)
self.word_embeddings[lang] = M
_r[lang] = lX[lang].dot(M)
if supervised:
for lang in languages:
S = WCE_matrix(lX, ly, lang)
# S = np.squeeze(np.asarray(S)) # casting to ndarray to better visualize S while debugging
self.supervised_embeddings[lang] = S
if unsupervised:
_r[lang] = np.hstack((_r[lang], lX[lang].dot(S)))
@ -562,7 +558,7 @@ class AndreaCLF(FunnellingPolylingualClassifier):
_vertical_Zy = np.vstack([zy[lang] for lang in self.languages])
self.standardizer = StandardizeTransformer()
_vertical_Z = self.standardizer.fit_predict(_vertical_Z)
_vertical_Z = self.standardizer.fit_predict(_vertical_Z)
print('fitting the Z-space of shape={}'.format(_vertical_Z.shape))
self.model = MonolingualClassifier(base_learner=self.meta_learner, parameters=self.meta_parameters,

View File

@ -1,7 +1,55 @@
id method learner embed optimp dataset binary languages time lang macrof1 microf1 macrok microk notes
jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_M PolyEmbed_andrea svm M False test_datasetname not_binary not_ablation 55.56810355186462 da 0.7933333333333333 0.0 0.7933333333333333 0.0 nope
jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_M PolyEmbed_andrea svm M False test_datasetname not_binary not_ablation 55.56810355186462 en 0.7866666666666666 0.0 0.7927111111111111 -0.0003376325207643527 nope
jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_M PolyEmbed_andrea svm M False test_datasetname not_binary not_ablation 55.56810355186462 fr 0.7866666666666666 0.0 0.7930666666666667 -0.0001350530083057411 nope
jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_None PolyEmbed_andrea svm None False test_datasetname not_binary not_ablation 24.031760931015015 da 0.7933333333333333 0.0 0.7933333333333333 0.0 nope
jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_None PolyEmbed_andrea svm None False test_datasetname not_binary not_ablation 24.031760931015015 en 0.7933333333333333 0.0 0.7931111111111111 -0.00013505300830574107 nope
jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_None PolyEmbed_andrea svm None False test_datasetname not_binary not_ablation 24.031760931015015 fr 0.7933333333333333 0.0 0.7933333333333333 0.0 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 it 0.5367684112761455 0.7945344129554656 0.5179685773363333 0.7651326488894972 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 pt 0.6969974938193201 0.878625134264232 0.6967392557377021 0.8466030321042095 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 sv 0.502213941379271 0.7700107543401444 0.4991078326315248 0.7207899075774371 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 es 0.5817849682843411 0.8448214916931778 0.5849433134898768 0.8202407220651875 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 en 0.5284100314545743 0.7625649913344887 0.4968119038332687 0.7152142337789349 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 da 0.4868904596668941 0.7971705872676427 0.4554442856126113 0.741227149968307 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 nl 0.5470546398570723 0.8276762402088773 0.5177281560038681 0.7850292121533595 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 fr 0.4997574965766772 0.7678434382194935 0.4836027981945328 0.7099957841328215 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 de 0.4220457399934653 0.7444316119452236 0.4256936056238835 0.7167749374918141 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 it 0.5398437760931379 0.8008933172994331 0.5146465197929204 0.7584451610463148 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 pt 0.6975279233747671 0.8779959377115775 0.6911573032014029 0.8392738059784555 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 sv 0.5179339368901748 0.7752035065748278 0.4962165022301373 0.7133720895906155 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 es 0.5745246656272296 0.8476464247215235 0.5736797442258523 0.8104027280076678 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 en 0.5265892627601801 0.761854398025736 0.4868823643967914 0.7032312369952987 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 da 0.4857267508065667 0.7955911823647295 0.449682467737542 0.7293013090493592 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 nl 0.5461000743929812 0.8304711580801409 0.5139887576564601 0.7790659402231745 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 fr 0.5015991524998897 0.7699748500677114 0.4811739320459739 0.7065159928392686 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 de 0.4141396160516795 0.743810005053057 0.4126132681585116 0.7023983497130937 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 it 0.4810224709403544 0.7617194410047762 0.453310215598049 0.6999032557458222 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 pt 0.6693663195289151 0.8619702956806105 0.6657298472047529 0.8182397742327547 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 sv 0.43107388787211537 0.7126933954416902 0.4180735239763325 0.6168407376537499 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 es 0.5087201120140917 0.8249322493224932 0.5032299168859704 0.7835086748116167 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 en 0.3822498549987095 0.6877811094452774 0.3309945723997902 0.5962925522774631 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 da 0.4517051377915163 0.7658914728682171 0.4030339299921389 0.6806166833916132 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 nl 0.4875303727964308 0.7853962600178095 0.4534046979963794 0.7270844266398626 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 fr 0.3750315407356979 0.6999393816932714 0.3628389019101708 0.6136670285424017 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 de 0.355059356514748 0.7046466085098807 0.33834564366266284 0.6299245108196094 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 it 0.4755443069888554 0.7675079985780305 0.4501140447119437 0.7023435117413848 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 pt 0.673303227450142 0.8655002733734279 0.6702445967772233 0.8193963705153853 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 sv 0.4189470089118392 0.7236711786068009 0.4198491651634073 0.6314272037990425 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 es 0.5178080058189616 0.8268359020852222 0.5104336022388637 0.782714898784318 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 en 0.4115752894185112 0.7001869158878504 0.35164720517285003 0.6091191993104883 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 da 0.4437869429842064 0.7626499739175796 0.39704879178312197 0.6717100410826179 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 nl 0.47635948919429705 0.7874471399955486 0.4589309165206792 0.7292337019755739 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 fr 0.39374621795002507 0.7063947733122155 0.3850407928528449 0.6315594797194366 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 de 0.3539890425069821 0.7095981751184418 0.3512802070446796 0.6432196317592322 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 it 0.5791455159341481 0.8060849214309596 0.6034752340075125 0.7869853576681214 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 pt 0.6403974389994276 0.8803876562101505 0.6565213830246649 0.8497743924811387 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 sv 0.5032337014290953 0.7768595041322314 0.4719549200388494 0.7364733997369779 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 es 0.5200567247634353 0.8529964145466963 0.4908726477090496 0.8285929531854332 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 en 0.512424485488998 0.7533647963642719 0.4719843960571978 0.7044441169169227 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 da 0.5861231569852233 0.8040595842200032 0.5393761149602847 0.7381233055764151 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 nl 0.6072184716496147 0.8335123523093448 0.5845309357041368 0.8020267337813639 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 fr 0.4923294612439038 0.7854697603651578 0.4713782273939219 0.7329001302478475 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 de 0.4709904181031267 0.7457793804294378 0.4465581491449931 0.7046844416244138 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 it 0.575387626645539 0.8064243448858833 0.5958411838194531 0.7790018114269683 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 pt 0.653004040098633 0.8791937747161628 0.6559210761775208 0.8482450061614855 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 sv 0.49944915222086167 0.7789179104477612 0.4604673876743342 0.727778938054739 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 es 0.5144474487169811 0.8559087767795439 0.48397711649967695 0.8222692824953204 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 en 0.5160737755179508 0.755674709562109 0.45961112517260677 0.6921096138985132 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 da 0.5875776383868945 0.8015873015873016 0.5367286265015276 0.7288571047461061 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 nl 0.6079883230969934 0.8363004776378636 0.5828217771858487 0.7968282071156207 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 fr 0.4966338770370634 0.7860696517412935 0.46250527724325174 0.7292650668002159 nope
rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 de 0.4675732669000923 0.7479187479187479 0.43767984457683634 0.69653035770654 nope

1 id method learner embed optimp dataset binary languages time lang macrof1 microf1 macrok microk notes
2 jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_M rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm M None False True test_datasetname rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 55.56810355186462 161.99278807640076 da it 0.7933333333333333 0.5367684112761455 0.0 0.7945344129554656 0.7933333333333333 0.5179685773363333 0.0 0.7651326488894972 nope
3 jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_M rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm M None False True test_datasetname rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 55.56810355186462 161.99278807640076 en pt 0.7866666666666666 0.6969974938193201 0.0 0.878625134264232 0.7927111111111111 0.6967392557377021 -0.0003376325207643527 0.8466030321042095 nope
4 jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_M rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm M None False True test_datasetname rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 55.56810355186462 161.99278807640076 fr sv 0.7866666666666666 0.502213941379271 0.0 0.7700107543401444 0.7930666666666667 0.4991078326315248 -0.0001350530083057411 0.7207899075774371 nope
5 jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_None rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None False True test_datasetname rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 24.031760931015015 161.99278807640076 da es 0.7933333333333333 0.5817849682843411 0.0 0.8448214916931778 0.7933333333333333 0.5849433134898768 0.0 0.8202407220651875 nope
6 jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_None rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None False True test_datasetname rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 24.031760931015015 161.99278807640076 en 0.7933333333333333 0.5284100314545743 0.0 0.7625649913344887 0.7931111111111111 0.4968119038332687 -0.00013505300830574107 0.7152142337789349 nope
7 jrc_doclist_1958-2005vs2006_all_top300_noparallel_processed_run0.picklePolyEmbedd_andrea_None rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None False True test_datasetname rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 24.031760931015015 161.99278807640076 fr da 0.7933333333333333 0.4868904596668941 0.0 0.7971705872676427 0.7933333333333333 0.4554442856126113 0.0 0.741227149968307 nope
8 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 nl 0.5470546398570723 0.8276762402088773 0.5177281560038681 0.7850292121533595 nope
9 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 fr 0.4997574965766772 0.7678434382194935 0.4836027981945328 0.7099957841328215 nope
10 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 161.99278807640076 de 0.4220457399934653 0.7444316119452236 0.4256936056238835 0.7167749374918141 nope
11 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 it 0.5398437760931379 0.8008933172994331 0.5146465197929204 0.7584451610463148 nope
12 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 pt 0.6975279233747671 0.8779959377115775 0.6911573032014029 0.8392738059784555 nope
13 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 sv 0.5179339368901748 0.7752035065748278 0.4962165022301373 0.7133720895906155 nope
14 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 es 0.5745246656272296 0.8476464247215235 0.5736797442258523 0.8104027280076678 nope
15 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 en 0.5265892627601801 0.761854398025736 0.4868823643967914 0.7032312369952987 nope
16 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 da 0.4857267508065667 0.7955911823647295 0.449682467737542 0.7293013090493592 nope
17 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 nl 0.5461000743929812 0.8304711580801409 0.5139887576564601 0.7790659402231745 nope
18 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 fr 0.5015991524998897 0.7699748500677114 0.4811739320459739 0.7065159928392686 nope
19 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 503.81587314605713 de 0.4141396160516795 0.743810005053057 0.4126132681585116 0.7023983497130937 nope
20 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 it 0.4810224709403544 0.7617194410047762 0.453310215598049 0.6999032557458222 nope
21 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 pt 0.6693663195289151 0.8619702956806105 0.6657298472047529 0.8182397742327547 nope
22 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 sv 0.43107388787211537 0.7126933954416902 0.4180735239763325 0.6168407376537499 nope
23 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 es 0.5087201120140917 0.8249322493224932 0.5032299168859704 0.7835086748116167 nope
24 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 en 0.3822498549987095 0.6877811094452774 0.3309945723997902 0.5962925522774631 nope
25 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 da 0.4517051377915163 0.7658914728682171 0.4030339299921389 0.6806166833916132 nope
26 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 nl 0.4875303727964308 0.7853962600178095 0.4534046979963794 0.7270844266398626 nope
27 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 fr 0.3750315407356979 0.6999393816932714 0.3628389019101708 0.6136670285424017 nope
28 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_F_optimC PolyEmbed_andrea svm F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1095.333437204361 de 0.355059356514748 0.7046466085098807 0.33834564366266284 0.6299245108196094 nope
29 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 it 0.4755443069888554 0.7675079985780305 0.4501140447119437 0.7023435117413848 nope
30 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 pt 0.673303227450142 0.8655002733734279 0.6702445967772233 0.8193963705153853 nope
31 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 sv 0.4189470089118392 0.7236711786068009 0.4198491651634073 0.6314272037990425 nope
32 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 es 0.5178080058189616 0.8268359020852222 0.5104336022388637 0.782714898784318 nope
33 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 en 0.4115752894185112 0.7001869158878504 0.35164720517285003 0.6091191993104883 nope
34 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 da 0.4437869429842064 0.7626499739175796 0.39704879178312197 0.6717100410826179 nope
35 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 nl 0.47635948919429705 0.7874471399955486 0.4589309165206792 0.7292337019755739 nope
36 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 fr 0.39374621795002507 0.7063947733122155 0.3850407928528449 0.6315594797194366 nope
37 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.picklePolyEmbedd_andrea_M_and_F_optimC PolyEmbed_andrea svm M_and_F True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run0.pickle not_binary not_ablation 1251.0414910316467 de 0.3539890425069821 0.7095981751184418 0.3512802070446796 0.6432196317592322 nope
38 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 it 0.5791455159341481 0.8060849214309596 0.6034752340075125 0.7869853576681214 nope
39 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 pt 0.6403974389994276 0.8803876562101505 0.6565213830246649 0.8497743924811387 nope
40 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 sv 0.5032337014290953 0.7768595041322314 0.4719549200388494 0.7364733997369779 nope
41 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 es 0.5200567247634353 0.8529964145466963 0.4908726477090496 0.8285929531854332 nope
42 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 en 0.512424485488998 0.7533647963642719 0.4719843960571978 0.7044441169169227 nope
43 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 da 0.5861231569852233 0.8040595842200032 0.5393761149602847 0.7381233055764151 nope
44 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 nl 0.6072184716496147 0.8335123523093448 0.5845309357041368 0.8020267337813639 nope
45 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 fr 0.4923294612439038 0.7854697603651578 0.4713782273939219 0.7329001302478475 nope
46 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_None_optimC PolyEmbed_andrea svm None True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 161.2168996334076 de 0.4709904181031267 0.7457793804294378 0.4465581491449931 0.7046844416244138 nope
47 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 it 0.575387626645539 0.8064243448858833 0.5958411838194531 0.7790018114269683 nope
48 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 pt 0.653004040098633 0.8791937747161628 0.6559210761775208 0.8482450061614855 nope
49 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 sv 0.49944915222086167 0.7789179104477612 0.4604673876743342 0.727778938054739 nope
50 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 es 0.5144474487169811 0.8559087767795439 0.48397711649967695 0.8222692824953204 nope
51 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 en 0.5160737755179508 0.755674709562109 0.45961112517260677 0.6921096138985132 nope
52 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 da 0.5875776383868945 0.8015873015873016 0.5367286265015276 0.7288571047461061 nope
53 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 nl 0.6079883230969934 0.8363004776378636 0.5828217771858487 0.7968282071156207 nope
54 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 fr 0.4966338770370634 0.7860696517412935 0.46250527724325174 0.7292650668002159 nope
55 rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.picklePolyEmbedd_andrea_M_optimC PolyEmbed_andrea svm M True rcv1-2_doclist_trByLang1000_teByLang1000_processed_run1.pickle not_binary not_ablation 497.6823613643646 de 0.4675732669000923 0.7479187479187479 0.43767984457683634 0.69653035770654 nope