committing last changes before creating a branch

adding multi-label classification methods
launching experiments
2021-10-13 11:53:19 +02:00 · 2021-09-02 11:07:33 +02:00 · 2021-08-29 11:03:51 +02:00 · 2021-08-27 14:01:01 +02:00 · 2021-08-27 13:57:33 +02:00 · 2021-08-27 13:57:26 +02:00
36 changed files with 3579 additions and 113 deletions
--- a/MultiLabel/NOTES.txt
+++ b/MultiLabel/NOTES.txt
@ -0,0 +1,38 @@
+Classifiers
+
+- Classifiers binary, single-label, OneVsRest or MultiOutput:
+    - LR
+    - LinearSVC (?)
+
+- Classifiers natively multi-label:
+    - from scikit-multilearn (x11)
+    -
+
+Protocols:
+    - NPP
+    - APP (for each class)
+
+
+
+Things to test:
+- MultiChain for classification, MultiChain for regression...
+- Reimplement stacking with sklearn.ensemble.StackingClassifier? No parece facil.
+
+- Independent classifiers + independent quantifiers
+- Stacking + independent quantifiers
+- ClassifierChain + independent quantifiers
+- Independent quantifiers + cross-class regression (independent?)
+- Stacking + cross-class regression
+- ClassifierChain + cross-class regression
+- Covariates (Means, CovMatrix from samples) + multioutput regression?
+- Covariates concatented with quantifiers predictions + cross-class regression?
+
+- Model Selection for specific protocols?
+
+TODO:
+- decide methods
+    - decide classifiers binary
+    - decide classifiers multi-label
+    - decide quantifiers naive
+    - decide quantifiers multi-label
+- decide datasets
--- a/MultiLabel/init.py
+++ b/MultiLabel/init.py
--- a/MultiLabel/data/init.py
+++ b/MultiLabel/data/init.py
--- a/MultiLabel/data/dataset.py
+++ b/MultiLabel/data/dataset.py
@ -0,0 +1,229 @@
+import os,sys
+from sklearn.datasets import get_data_home, fetch_20newsgroups
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.preprocessing import MultiLabelBinarizer
+from MultiLabel.data.jrcacquis_reader import fetch_jrcacquis
+from MultiLabel.data.ohsumed_reader import fetch_ohsumed50k
+from MultiLabel.data.reuters21578_reader import fetch_reuters21578
+from MultiLabel.data.rcv_reader import fetch_RCV1
+from MultiLabel.data.wipo_reader import fetch_WIPOgamma, WipoGammaDocument
+import pickle
+import numpy as np
+from tqdm import tqdm
+from os.path import join
+import re
+
+
+def init_vectorizer():
+    return TfidfVectorizer(min_df=5, sublinear_tf=True)
+
+
+class Dataset:
+
+    dataset_available = {'reuters21578', '20newsgroups', 'ohsumed', 'rcv1', 'ohsumed', 'jrcall',
+                         'wipo-sl-mg','wipo-ml-mg','wipo-sl-sc','wipo-ml-sc'}
+
+    def __init__(self, name):
+        assert name in Dataset.dataset_available, f'dataset {name} is not available'
+        if name=='reuters21578':
+            self._load_reuters()
+        elif name == '20newsgroups':
+            self._load_20news()
+        elif name == 'rcv1':
+            self._load_rcv1()
+        elif name == 'ohsumed':
+            self._load_ohsumed()
+        elif name == 'jrcall':
+            self._load_jrc(version='all')
+        elif name == 'wipo-sl-mg':
+            self._load_wipo('singlelabel', 'maingroup')
+        elif name == 'wipo-ml-mg':
+            self._load_wipo('multilabel', 'maingroup')
+        elif name == 'wipo-sl-sc':
+            self._load_wipo('singlelabel', 'subclass')
+        elif name == 'wipo-ml-sc':
+            self._load_wipo('multilabel', 'subclass')
+
+        self.nC = self.devel_labelmatrix.shape[1]
+        self._vectorizer = init_vectorizer()
+        self._vectorizer.fit(self.devel_raw)
+        self.vocabulary = self._vectorizer.vocabulary_
+
+    def show(self):
+        nTr_docs = len(self.devel_raw)
+        nTe_docs = len(self.test_raw)
+        nfeats = len(self._vectorizer.vocabulary_)
+        nC = self.devel_labelmatrix.shape[1]
+        nD=nTr_docs+nTe_docs
+        print(f'{self.classification_type}, nD={nD}=({nTr_docs}+{nTe_docs}), nF={nfeats}, nC={nC}')
+        return self
+
+    def _load_reuters(self):
+        data_path = os.path.join(get_data_home(), 'reuters21578')
+        devel = fetch_reuters21578(subset='train', data_path=data_path)
+        test = fetch_reuters21578(subset='test', data_path=data_path)
+
+        self.classification_type = 'multilabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel.data), mask_numbers(test.data)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel.target, test.target)
+        self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+
+    def _load_rcv1(self):
+        data_path = '../datasets/RCV1-v2/unprocessed_corpus' #TODO: check when missing
+        devel = fetch_RCV1(subset='train', data_path=data_path)
+        test = fetch_RCV1(subset='test', data_path=data_path)
+
+        self.classification_type = 'multilabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel.data), mask_numbers(test.data)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel.target, test.target)
+        self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+
+    def _load_jrc(self, version):
+        assert version in ['300','all'], 'allowed versions are "300" or "all"'
+        data_path = "../datasets/JRC_Acquis_v3"
+        tr_years=list(range(1986, 2006))
+        te_years=[2006]
+        if version=='300':
+            training_docs, tr_cats = fetch_jrcacquis(data_path=data_path, years=tr_years, cat_threshold=1,most_frequent=300)
+            test_docs, te_cats = fetch_jrcacquis(data_path=data_path, years=te_years, cat_filter=tr_cats)
+        else:
+            training_docs, tr_cats = fetch_jrcacquis(data_path=data_path, years=tr_years, cat_threshold=1)
+            test_docs, te_cats = fetch_jrcacquis(data_path=data_path, years=te_years, cat_filter=tr_cats)
+        print(f'load jrc-acquis (English) with {len(tr_cats)} tr categories ({len(te_cats)} te categories)')
+
+        devel_data = JRCAcquis_Document.get_text(training_docs)
+        test_data = JRCAcquis_Document.get_text(test_docs)
+        devel_target = JRCAcquis_Document.get_target(training_docs)
+        test_target = JRCAcquis_Document.get_target(test_docs)
+
+        self.classification_type = 'multilabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel_data), mask_numbers(test_data)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel_target, test_target)
+        self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+
+    def _load_ohsumed(self):
+        data_path = os.path.join(get_data_home(), 'ohsumed50k')
+        devel = fetch_ohsumed50k(subset='train', data_path=data_path)
+        test = fetch_ohsumed50k(subset='test', data_path=data_path)
+
+        self.classification_type = 'multilabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel.data), mask_numbers(test.data)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel.target, test.target)
+        self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+
+    def _load_20news(self):
+        metadata = ('headers', 'footers', 'quotes')
+        devel = fetch_20newsgroups(subset='train', remove=metadata)
+        test = fetch_20newsgroups(subset='test', remove=metadata)
+        self.classification_type = 'singlelabel'
+        self.devel_raw, self.test_raw = mask_numbers(devel.data), mask_numbers(test.data)
+        self.devel_target, self.test_target = devel.target, test.target
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(self.devel_target.reshape(-1,1), self.test_target.reshape(-1,1))
+
+    def _load_fasttext_data(self,name):
+        data_path='../datasets/fastText'
+        self.classification_type = 'singlelabel'
+        name=name.replace('-','_')
+        train_file = join(data_path,f'{name}.train')
+        assert os.path.exists(train_file), f'file {name} not found, please place the fasttext data in {data_path}' #' or specify the path' #todo
+        self.devel_raw, self.devel_target = load_fasttext_format(train_file)
+        self.test_raw, self.test_target = load_fasttext_format(join(data_path, f'{name}.test'))
+        self.devel_raw = mask_numbers(self.devel_raw)
+        self.test_raw = mask_numbers(self.test_raw)
+        self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(self.devel_target.reshape(-1, 1), self.test_target.reshape(-1, 1))
+
+    def _load_wipo(self, classmode, classlevel):
+        assert classmode in {'singlelabel', 'multilabel'}, 'available class_mode are sl (single-label) or ml (multi-label)'
+        data_path = '../datasets/WIPO/wipo-gamma/en'
+        data_proc = '../datasets/WIPO-extracted'
+
+        devel = fetch_WIPOgamma(subset='train', classification_level=classlevel, data_home=data_path, extracted_path=data_proc, text_fields=['abstract'])
+        test  = fetch_WIPOgamma(subset='test', classification_level=classlevel, data_home=data_path, extracted_path=data_proc, text_fields=['abstract'])
+
+        devel_data = [d.text for d in devel]
+        test_data  = [d.text for d in test]
+        self.devel_raw, self.test_raw = mask_numbers(devel_data), mask_numbers(test_data)
+
+        self.classification_type = classmode
+        if classmode== 'multilabel':
+            devel_target = [d.all_labels for d in devel]
+            test_target  = [d.all_labels for d in test]
+            self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(devel_target, test_target)
+            self.devel_target, self.test_target = self.devel_labelmatrix, self.test_labelmatrix
+        else:
+            devel_target = [d.main_label for d in devel]
+            test_target  = [d.main_label for d in test]
+            # only for labels with at least one training document
+            class_id = {labelname:index for index,labelname in enumerate(sorted(set(devel_target)))}
+            devel_target = np.array([class_id[id] for id in devel_target]).astype(int)
+            test_target  = np.array([class_id.get(id,None) for id in test_target])
+            if None in test_target:
+                print(f'deleting {(test_target==None).sum()} test documents without valid categories')
+                keep_pos = test_target!=None
+                self.test_raw = (np.asarray(self.test_raw)[keep_pos]).tolist()
+                test_target = test_target[keep_pos]
+            test_target=test_target.astype(int)
+            self.devel_target, self.test_target = devel_target, test_target
+            self.devel_labelmatrix, self.test_labelmatrix = _label_matrix(self.devel_target.reshape(-1, 1), self.test_target.reshape(-1, 1))
+
+    def vectorize(self):
+        if not hasattr(self, 'Xtr') or not hasattr(self, 'Xte'):
+            self.Xtr = self._vectorizer.transform(self.devel_raw)
+            self.Xte = self._vectorizer.transform(self.test_raw)
+            self.Xtr.sort_indices()
+            self.Xte.sort_indices()
+        return self.Xtr, self.Xte
+
+    def analyzer(self):
+        return self._vectorizer.build_analyzer()
+
+    @classmethod
+    def load(cls, dataset_name, pickle_path=None):
+
+        if pickle_path:
+            if os.path.exists(pickle_path):
+                print(f'loading pickled dataset from {pickle_path}')
+                dataset = pickle.load(open(pickle_path, 'rb'))
+            else:
+                print(f'fetching dataset and dumping it into {pickle_path}')
+                dataset = Dataset(name=dataset_name)
+                print('vectorizing for faster processing')
+                dataset.vectorize()
+                print('dumping')
+                pickle.dump(dataset, open(pickle_path, 'wb', pickle.HIGHEST_PROTOCOL))
+        else:
+            print(f'loading dataset {dataset_name}')
+            dataset = Dataset(name=dataset_name)
+
+        print('[Done]')
+        return dataset
+
+
+def _label_matrix(tr_target, te_target):
+    mlb = MultiLabelBinarizer(sparse_output=True)
+    ytr = mlb.fit_transform(tr_target)
+    yte = mlb.transform(te_target)
+    print(mlb.classes_)
+    return ytr, yte
+
+
+def load_fasttext_format(path):
+    print(f'loading {path}')
+    labels,docs=[],[]
+    for line in tqdm(open(path, 'rt').readlines()):
+        space = line.strip().find(' ')
+        label = int(line[:space].replace('__label__',''))-1
+        labels.append(label)
+        docs.append(line[space+1:])
+    labels=np.asarray(labels,dtype=int)
+    return docs,labels
+
+
+def mask_numbers(data, number_mask='numbermask'):
+    mask = re.compile(r'\b[0-9][0-9.,-]*\b')
+    masked = []
+    for text in tqdm(data, desc='masking numbers'):
+        masked.append(mask.sub(number_mask, text))
+    return masked
+
+
--- a/MultiLabel/data/jrcacquis_reader.py
+++ b/MultiLabel/data/jrcacquis_reader.py
@ -0,0 +1,263 @@
+import os, sys
+from os.path import join
+import tarfile
+import xml.etree.ElementTree as ET
+from sklearn.datasets import get_data_home
+import pickle
+import rdflib
+from rdflib.namespace import RDF, SKOS
+from rdflib import URIRef
+import zipfile
+from collections import Counter
+from tqdm import tqdm
+from random import shuffle
+from util.file import *
+
+
+class JRCAcquis_Document:
+    def __init__(self, id, name, lang, year, head, body, categories):
+        self.id = id
+        self.parallel_id = name
+        self.lang = lang
+        self.year = year
+        self.text = body if not head else head + "\n" + body
+        self.categories = categories
+
+    @classmethod
+    def get_text(cls, jrc_documents):
+        return [d.text for d in jrc_documents]
+
+    @classmethod
+    def get_target(cls, jrc_documents):
+        return [d.categories for d in jrc_documents]
+
+
+# this is a workaround... for some reason, acutes are codified in a non-standard manner in titles
+# however, it seems that the title is often appearing as the first paragraph in the text/body (with
+# standard codification), so it might be preferable not to read the header after all (as here by default)
+def _proc_acute(text):
+    for ch in ['a','e','i','o','u']:
+        text = text.replace('%'+ch+'acute%',ch)
+    return text
+
+def parse_document(file, year, head=False):
+    root = ET.parse(file).getroot()
+
+    doc_name = root.attrib['n'] # e.g., '22006A0211(01)'
+    doc_lang = root.attrib['lang'] # e.g., 'es'
+    doc_id   = root.attrib['id'] # e.g., 'jrc22006A0211_01-es'
+    doc_categories = [cat.text for cat in root.findall('.//teiHeader/profileDesc/textClass/classCode[@scheme="eurovoc"]')]
+    doc_head = _proc_acute(root.find('.//text/body/head').text) if head else ''
+    doc_body = '\n'.join([p.text for p in root.findall('.//text/body/div[@type="body"]/p')])
+
+    def raise_if_empty(field, from_file):
+        if isinstance(field, str):
+            if not field.strip():
+                raise ValueError("Empty field in file %s" % from_file)
+
+    raise_if_empty(doc_name, file)
+    raise_if_empty(doc_lang, file)
+    raise_if_empty(doc_id, file)
+    if head: raise_if_empty(doc_head, file)
+    raise_if_empty(doc_body, file)
+
+    return JRCAcquis_Document(id=doc_id, name=doc_name, lang=doc_lang, year=year, head=doc_head, body=doc_body, categories=doc_categories)
+
+#filters out documents which do not contain any category in the cat_filter list, and filter all labels not in cat_filter
+def _filter_by_category(doclist, cat_filter):
+    if not isinstance(cat_filter, frozenset):
+        cat_filter = frozenset(cat_filter)
+    filtered = []
+    for doc in doclist:
+        doc.categories = list(cat_filter & set(doc.categories))
+        if doc.categories:
+            doc.categories.sort()
+            filtered.append(doc)
+    print("filtered %d documents out without categories in the filter list" % (len(doclist) - len(filtered)))
+    return filtered
+
+#filters out categories with less than cat_threshold documents (and filters documents containing those categories)
+def _filter_by_frequency(doclist, cat_threshold):
+    cat_count = Counter()
+    for d in doclist:
+        cat_count.update(d.categories)
+
+    freq_categories = [cat for cat,count in cat_count.items() if count>cat_threshold]
+    freq_categories.sort()
+    return _filter_by_category(doclist, freq_categories), freq_categories
+
+#select top most_frequent categories (and filters documents containing those categories)
+def _most_common(doclist, most_frequent):
+    cat_count = Counter()
+    for d in doclist:
+        cat_count.update(d.categories)
+
+    freq_categories = [cat for cat,count in cat_count.most_common(most_frequent)]
+    freq_categories.sort()
+    return _filter_by_category(doclist, freq_categories), freq_categories
+
+def _get_categories(request):
+    final_cats = set()
+    for d in request:
+        final_cats.update(d.categories)
+    return list(final_cats)
+
+def fetch_jrcacquis(lang='en', data_path=None, years=None, ignore_unclassified=True,
+                    cat_filter=None, cat_threshold=0, most_frequent=-1,
+                    DOWNLOAD_URL_BASE ='http://optima.jrc.it/Acquis/JRC-Acquis.3.0/corpus/'):
+
+    if not data_path:
+        data_path = get_data_home()
+
+    if not os.path.exists(data_path):
+        os.mkdir(data_path)
+
+    request = []
+    total_read = 0
+    file_name = 'jrc-' + lang + '.tgz'
+    archive_path = join(data_path, file_name)
+
+    if not os.path.exists(archive_path):
+        print("downloading language-specific dataset (once and for all) into %s" % data_path)
+        DOWNLOAD_URL = join(DOWNLOAD_URL_BASE, file_name)
+        download_file(DOWNLOAD_URL, archive_path)
+        print("untarring dataset...")
+        tarfile.open(archive_path, 'r:gz').extractall(data_path)
+
+    documents_dir = join(data_path, lang)
+
+    print("Reading documents...")
+    read = 0
+    for dir in list_dirs(documents_dir):
+        year = int(dir)
+        if years==None or year in years:
+            year_dir = join(documents_dir,dir)
+            l_y_documents = []
+            all_documents = list_files(year_dir)
+            empty = 0
+            pbar = tqdm(enumerate(all_documents))
+            for i,doc_file in pbar:
+                try:
+                    jrc_doc = parse_document(join(year_dir, doc_file), year)
+                except ValueError:
+                    jrc_doc = None
+
+                if jrc_doc and (not ignore_unclassified or jrc_doc.categories):
+                    l_y_documents.append(jrc_doc)
+                else: empty += 1
+                read+=1
+                pbar.set_description(f'from {year_dir}: discarded {empty} without categories or empty fields')
+            request += l_y_documents
+    print("Read %d documents for language %s\n" % (read, lang))
+    total_read += read
+
+    final_cats = _get_categories(request)
+
+    if cat_filter:
+        request = _filter_by_category(request, cat_filter)
+        final_cats = _get_categories(request)
+    if cat_threshold > 0:
+        request, final_cats = _filter_by_frequency(request, cat_threshold)
+    if most_frequent != -1 and len(final_cats) > most_frequent:
+        request, final_cats = _most_common(request, most_frequent)
+
+    return request, final_cats
+
+def print_cat_analysis(request):
+    cat_count = Counter()
+    for d in request:
+        cat_count.update(d.categories)
+    print("Number of active categories: {}".format(len(cat_count)))
+    print(cat_count.most_common())
+
+# inspects the Eurovoc thesaurus in order to select a subset of categories
+# currently, only 'broadest' policy (i.e., take all categories with no parent category), and 'all' is implemented
+def inspect_eurovoc(data_path, eurovoc_skos_core_concepts_filename='eurovoc_in_skos_core_concepts.rdf',
+                    eurovoc_url="http://publications.europa.eu/mdr/resource/thesaurus/eurovoc-20160630-0/skos/eurovoc_in_skos_core_concepts.zip",
+                    select="broadest"):
+
+    fullpath_pickle = join(data_path, select+'_concepts.pickle')
+    if os.path.exists(fullpath_pickle):
+        print("Pickled object found in %s. Loading it." % fullpath_pickle)
+        return pickle.load(open(fullpath_pickle,'rb'))
+
+    fullpath = join(data_path, eurovoc_skos_core_concepts_filename)
+    if not os.path.exists(fullpath):
+        print("Path %s does not exist. Trying to download the skos EuroVoc file from %s" % (data_path, eurovoc_url))
+        download_file(eurovoc_url, fullpath)
+        print("Unzipping file...")
+        zipped = zipfile.ZipFile(data_path + '.zip', 'r')
+        zipped.extract("eurovoc_in_skos_core_concepts.rdf", data_path)
+        zipped.close()
+
+    print("Parsing %s" %fullpath)
+    g = rdflib.Graph()
+    g.parse(location=fullpath, format="application/rdf+xml")
+
+    if select == "all":
+        print("Selecting all concepts")
+        all_concepts = list(g.subjects(RDF.type, SKOS.Concept))
+        all_concepts = [c.toPython().split('/')[-1] for c in all_concepts]
+        all_concepts.sort()
+        selected_concepts = all_concepts
+    elif select=="broadest":
+        print("Selecting broadest concepts (those without any other broader concept linked to it)")
+        all_concepts = set(g.subjects(RDF.type, SKOS.Concept))
+        narrower_concepts = set(g.subjects(SKOS.broader, None))
+        broadest_concepts = [c.toPython().split('/')[-1] for c in (all_concepts - narrower_concepts)]
+        broadest_concepts.sort()
+        selected_concepts = broadest_concepts
+    elif select=="leaves":
+        print("Selecting leaves concepts (those not linked as broader of any other concept)")
+        all_concepts = set(g.subjects(RDF.type, SKOS.Concept))
+        broad_concepts = set(g.objects(None, SKOS.broader))
+        leave_concepts = [c.toPython().split('/')[-1] for c in (all_concepts - broad_concepts)]
+        leave_concepts.sort()
+        selected_concepts = leave_concepts
+    else:
+        raise ValueError("Selection policy %s is not currently supported" % select)
+
+    print("%d %s concepts found" % (len(selected_concepts), leave_concepts))
+    print("Pickling concept list for faster further requests in %s" % fullpath_pickle)
+    pickle.dump(selected_concepts, open(fullpath_pickle, 'wb'), pickle.HIGHEST_PROTOCOL)
+
+    return selected_concepts
+
+
+
+if __name__ == '__main__':
+
+    # example code
+
+    train_years = list(range(1986, 2006))
+    test_years = [2006]
+    cat_policy = 'all' #'leaves'
+    most_common_cat = 300
+    JRC_DATAPATH = "../datasets/JRC_Acquis_v3"
+    cat_list = inspect_eurovoc(JRC_DATAPATH, select=cat_policy)
+
+    training_docs, tr_cats = fetch_jrcacquis(lang='en', data_path=JRC_DATAPATH, years=train_years,
+                                                 cat_filter=None, cat_threshold=1,
+                                                 most_frequent=most_common_cat)
+    test_docs, te_cats = fetch_jrcacquis(lang='en', data_path=JRC_DATAPATH, years=test_years,
+                                                 cat_filter=tr_cats, cat_threshold=1)
+    # training_cats = jrc_get_categories(training_docs)
+    # test_cats     = jrc_get_categories(test_docs)
+    # intersection_cats = [c for c in training_cats if c in test_cats]
+
+    # training_docs = jrc_filter_by_category(training_docs, intersection_cats)
+    # test_docs = jrc_filter_by_category(test_docs, intersection_cats)
+
+
+    print(f'JRC-train: {len(training_docs)} documents')
+    print(f'JRC-test: {len(test_docs)} documents')
+
+    print_cat_analysis(training_docs)
+    print_cat_analysis(test_docs)
+
+    """
+    JRC-train: 12615 documents, 300 cats
+    JRC-test: 7055 documents, 300 cats
+    """
+
+
--- a/MultiLabel/data/labeled.py
+++ b/MultiLabel/data/labeled.py
@ -0,0 +1,5 @@
+class LabelledDocuments:
+    def __init__(self, data, target, target_names):
+        self.data=data
+        self.target=target
+        self.target_names=target_names
--- a/MultiLabel/data/ohsumed_reader.py
+++ b/MultiLabel/data/ohsumed_reader.py
@ -0,0 +1,63 @@
+import os
+import pickle
+import tarfile
+from os.path import join
+import urllib.request
+from data.labeled import LabelledDocuments
+from util.file import create_if_not_exist, download_file_if_not_exists
+import math
+
+
+def fetch_ohsumed50k(data_path=None, subset='train', train_test_split=0.7):
+    _dataname = 'ohsumed50k'
+    if data_path is None:
+        data_path = join(os.path.expanduser('~'), _dataname)
+    create_if_not_exist(data_path)
+
+    pickle_file = join(data_path, _dataname + '.' + subset + str(train_test_split) + '.pickle')
+    if not os.path.exists(pickle_file):
+        DOWNLOAD_URL = ('http://disi.unitn.it/moschitti/corpora/ohsumed-all-docs.tar.gz')
+        archive_path = os.path.join(data_path, 'ohsumed-all-docs.tar.gz')
+        download_file_if_not_exists(DOWNLOAD_URL, archive_path)
+        untardir = 'ohsumed-all'
+        if not os.path.exists(os.path.join(data_path, untardir)):
+            print("untarring ohsumed...")
+            tarfile.open(archive_path, 'r:gz').extractall(data_path)
+
+        target_names = []
+        doc_classes = dict()
+        class_docs = dict()
+        content = dict()
+        doc_ids = set()
+        for cat_id in os.listdir(join(data_path, untardir)):
+            target_names.append(cat_id)
+            class_docs[cat_id] = []
+            for doc_id in os.listdir(join(data_path, untardir, cat_id)):
+                doc_ids.add(doc_id)
+                text_content = open(join(data_path, untardir, cat_id, doc_id), 'r').read()
+                if doc_id not in doc_classes: doc_classes[doc_id] = []
+                doc_classes[doc_id].append(cat_id)
+                if doc_id not in content: content[doc_id] = text_content
+                class_docs[cat_id].append(doc_id)
+        target_names.sort()
+        print('Read %d different documents' % len(doc_ids))
+
+        splitdata = dict({'train': [], 'test': []})
+        for cat_id in target_names:
+            free_docs = [d for d in class_docs[cat_id] if (d not in splitdata['train'] and d not in splitdata['test'])]
+            if len(free_docs) > 0:
+                split_point = int(math.floor(len(free_docs) * train_test_split))
+                splitdata['train'].extend(free_docs[:split_point])
+                splitdata['test'].extend(free_docs[split_point:])
+        for split in ['train', 'test']:
+            dataset = LabelledDocuments([], [], target_names)
+            for doc_id in splitdata[split]:
+                dataset.data.append(content[doc_id])
+                dataset.target.append([target_names.index(cat_id) for cat_id in doc_classes[doc_id]])
+            pickle.dump(dataset,
+                        open(join(data_path, _dataname + '.' + split + str(train_test_split) + '.pickle'), 'wb'),
+                        protocol=pickle.HIGHEST_PROTOCOL)
+
+    print(pickle_file)
+    return pickle.load(open(pickle_file, 'rb'))
+
--- a/MultiLabel/data/rcv_reader.py
+++ b/MultiLabel/data/rcv_reader.py
@ -0,0 +1,152 @@
+from zipfile import ZipFile
+import xml.etree.ElementTree as ET
+from data.labeled import LabelledDocuments
+from util.file import list_files
+from os.path import join, exists
+from util.file import download_file_if_not_exists
+import re
+from collections import Counter
+
+RCV1_TOPICHIER_URL = "http://www.ai.mit.edu/projects/jmlr/papers/volume5/lewis04a/a02-orig-topics-hierarchy/rcv1.topics.hier.orig"
+RCV1_BASE_URL = "http://www.daviddlewis.com/resources/testcollections/rcv1/"
+
+rcv1_test_data_gz = ['lyrl2004_tokens_test_pt0.dat.gz',
+             'lyrl2004_tokens_test_pt1.dat.gz',
+             'lyrl2004_tokens_test_pt2.dat.gz',
+             'lyrl2004_tokens_test_pt3.dat.gz']
+
+rcv1_train_data_gz = ['lyrl2004_tokens_train.dat.gz']
+
+rcv1_doc_cats_data_gz = 'rcv1-v2.topics.qrels.gz'
+
+class RCV_Document:
+    def __init__(self, id, text, categories, date=''):
+        self.id = id
+        self.date = date
+        self.text = text
+        self.categories = categories
+
+class IDRangeException(Exception): pass
+
+nwords = []
+
+def parse_document(xml_content, valid_id_range=None):
+    root = ET.fromstring(xml_content)
+
+    doc_id = root.attrib['itemid']
+    if valid_id_range is not None:
+        if not valid_id_range[0] <= int(doc_id) <= valid_id_range[1]:
+            raise IDRangeException
+
+    doc_categories = [cat.attrib['code'] for cat in
+                      root.findall('.//metadata/codes[@class="bip:topics:1.0"]/code')]
+
+    doc_date = root.attrib['date']
+    doc_title = root.find('.//title').text
+    doc_headline = root.find('.//headline').text
+    doc_body = '\n'.join([p.text for p in root.findall('.//text/p')])
+
+    if not doc_body:
+        raise ValueError('Empty document')
+
+    if doc_title is None: doc_title = ''
+    if doc_headline is None or doc_headline in doc_title: doc_headline = ''
+    text = '\n'.join([doc_title, doc_headline, doc_body]).strip()
+
+    return RCV_Document(id=doc_id, text=text, categories=doc_categories, date=doc_date)
+
+
+def fetch_RCV1(data_path, subset='all'):
+
+    assert subset in ['train', 'test', 'all'], 'split should either be "train", "test", or "all"'
+
+    request = []
+    labels = set()
+    read_documents = 0
+
+    training_documents = 23149
+    test_documents = 781265
+
+    if subset == 'all':
+        split_range = (2286, 810596)
+        expected = training_documents+test_documents
+    elif subset == 'train':
+        split_range = (2286, 26150)
+        expected = training_documents
+    else:
+        split_range = (26151, 810596)
+        expected = test_documents
+
+    # global nwords
+    # nwords=[]
+    for part in list_files(data_path):
+        if not re.match('\d+\.zip', part): continue
+        target_file = join(data_path, part)
+        assert exists(target_file), \
+            "You don't seem to have the file "+part+" in " + data_path + ", and the RCV1 corpus can not be downloaded"+\
+            " w/o a formal permission. Please, refer to " + RCV1_BASE_URL + " for more information."
+        zipfile = ZipFile(target_file)
+        for xmlfile in zipfile.namelist():
+            xmlcontent = zipfile.open(xmlfile).read()
+            try:
+                doc = parse_document(xmlcontent, valid_id_range=split_range)
+                labels.update(doc.categories)
+                request.append(doc)
+                read_documents += 1
+            except (IDRangeException,ValueError) as e:
+                pass
+            print('\r[{}] read {} documents'.format(part, len(request)), end='')
+            if read_documents == expected: break
+        if read_documents == expected: break
+
+    print()
+    # print('ave:{} std {} min {} max {}'.format(np.mean(nwords), np.std(nwords), np.min(nwords), np.max(nwords)))
+
+    return LabelledDocuments(data=[d.text for d in request], target=[d.categories for d in request], target_names=list(labels))
+
+
+
+def fetch_topic_hierarchy(path, topics='all'):
+    assert topics in ['all', 'leaves']
+
+    download_file_if_not_exists(RCV1_TOPICHIER_URL, path)
+    hierarchy = {}
+    for line in open(path, 'rt'):
+        parts = line.strip().split()
+        parent,child = parts[1],parts[3]
+        if parent not in hierarchy:
+            hierarchy[parent]=[]
+        hierarchy[parent].append(child)
+
+    del hierarchy['None']
+    del hierarchy['Root']
+    print(hierarchy)
+
+    if topics=='all':
+        topics = set(hierarchy.keys())
+        for parent in hierarchy.keys():
+            topics.update(hierarchy[parent])
+        return list(topics)
+    elif topics=='leaves':
+        parents = set(hierarchy.keys())
+        childs = set()
+        for parent in hierarchy.keys():
+            childs.update(hierarchy[parent])
+        return list(childs.difference(parents))
+
+
+if __name__=='__main__':
+
+    # example
+
+    RCV1_PATH = '../../datasets/RCV1-v2/unprocessed_corpus'
+
+    rcv1_train = fetch_RCV1(RCV1_PATH, subset='train')
+    rcv1_test = fetch_RCV1(RCV1_PATH, subset='test')
+
+    print('read {} documents in rcv1-train, and {} labels'.format(len(rcv1_train.data), len(rcv1_train.target_names)))
+    print('read {} documents in rcv1-test, and {} labels'.format(len(rcv1_test.data), len(rcv1_test.target_names)))
+
+    cats = Counter()
+    for cats in rcv1_train.target: cats.update(cats)
+    print('RCV1', cats)
--- a/MultiLabel/data/reuters21578_reader.py
+++ b/MultiLabel/data/reuters21578_reader.py
@ -0,0 +1,189 @@
+# Modified version of the code originally implemented by Eustache Diemert <eustache@diemert.fr>
+#          @FedericoV <https://github.com/FedericoV/>
+# with License: BSD 3 clause
+
+import os.path
+import re
+import tarfile
+from sklearn.datasets import get_data_home
+from six.moves import html_parser
+from six.moves import urllib
+import pickle
+from glob import glob
+import numpy as np
+from data.labeled import LabelledDocuments
+
+
+def _not_in_sphinx():
+    # Hack to detect whether we are running by the sphinx builder
+    return '__file__' in globals()
+
+
+class ReutersParser(html_parser.HTMLParser):
+    """Utility class to parse a SGML file and yield documents one at a time."""
+
+    def __init__(self, encoding='latin-1', data_path=None):
+        self.data_path = data_path
+        self.download_if_not_exist()
+        self.tr_docs = []
+        self.te_docs = []
+        html_parser.HTMLParser.__init__(self)
+        self._reset()
+        self.encoding = encoding
+        self.empty_docs = 0
+
+    def handle_starttag(self, tag, attrs):
+        method = 'start_' + tag
+        getattr(self, method, lambda x: None)(attrs)
+
+    def handle_endtag(self, tag):
+        method = 'end_' + tag
+        getattr(self, method, lambda: None)()
+
+    def _reset(self):
+        self.in_title = 0
+        self.in_body = 0
+        self.in_topics = 0
+        self.in_topic_d = 0
+        self.in_unproc_text = 0
+        self.title = ""
+        self.body = ""
+        self.topics = []
+        self.topic_d = ""
+        self.text = ""
+
+    def parse(self, fd):
+        for chunk in fd:
+            self.feed(chunk.decode(self.encoding))
+        self.close()
+
+    def handle_data(self, data):
+        if self.in_body:
+            self.body += data
+        elif self.in_title:
+            self.title += data
+        elif self.in_topic_d:
+            self.topic_d += data
+        elif self.in_unproc_text:
+            self.text += data
+
+    def start_reuters(self, attributes):
+        topic_attr = attributes[0][1]
+        lewissplit_attr = attributes[1][1]
+        self.lewissplit = u'unused'
+        if topic_attr==u'YES':
+            if lewissplit_attr == u'TRAIN':
+                self.lewissplit = 'train'
+            elif lewissplit_attr == u'TEST':
+                self.lewissplit = 'test'
+        pass
+
+    def end_reuters(self):
+        self.body = re.sub(r'\s+', r' ', self.body)
+        if self.lewissplit != u'unused':
+            parsed_doc = {'title': self.title, 'body': self.body, 'unproc':self.text, 'topics': self.topics}
+            if (self.title+self.body+self.text).strip() == '':
+                self.empty_docs += 1
+            if self.lewissplit == u'train':
+                self.tr_docs.append(parsed_doc)
+            elif self.lewissplit == u'test':
+                self.te_docs.append(parsed_doc)
+        self._reset()
+
+    def start_title(self, attributes):
+        self.in_title = 1
+
+    def end_title(self):
+        self.in_title = 0
+
+    def start_body(self, attributes):
+        self.in_body = 1
+
+    def end_body(self):
+        self.in_body = 0
+
+    def start_topics(self, attributes):
+        self.in_topics = 1
+
+    def end_topics(self):
+        self.in_topics = 0
+
+    def start_text(self, attributes):
+        if len(attributes)>0 and attributes[0][1] == u'UNPROC':
+            self.in_unproc_text = 1
+
+    def end_text(self):
+        self.in_unproc_text = 0
+
+    def start_d(self, attributes):
+        self.in_topic_d = 1
+
+    def end_d(self):
+        if self.in_topics:
+            self.topics.append(self.topic_d)
+        self.in_topic_d = 0
+        self.topic_d = ""
+
+    def download_if_not_exist(self):
+        DOWNLOAD_URL = ('http://archive.ics.uci.edu/ml/machine-learning-databases/'
+                        'reuters21578-mld/reuters21578.tar.gz')
+        ARCHIVE_FILENAME = 'reuters21578.tar.gz'
+
+        if self.data_path is None:
+            self.data_path = os.path.join(get_data_home(), "reuters")
+        if not os.path.exists(self.data_path):
+            """Download the dataset."""
+            print("downloading dataset (once and for all) into %s" % self.data_path)
+            os.mkdir(self.data_path)
+
+            def progress(blocknum, bs, size):
+                total_sz_mb = '%.2f MB' % (size / 1e6)
+                current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)
+                if _not_in_sphinx():
+                    print('\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb), end='')
+
+            archive_path = os.path.join(self.data_path, ARCHIVE_FILENAME)
+            urllib.request.urlretrieve(DOWNLOAD_URL, filename=archive_path,
+                                       reporthook=progress)
+            if _not_in_sphinx():
+                print('\r', end='')
+            print("untarring Reuters dataset...")
+            tarfile.open(archive_path, 'r:gz').extractall(self.data_path)
+            print("done.")
+
+
+def fetch_reuters21578(data_path=None, subset='train'):
+    if data_path is None:
+        data_path = os.path.join(get_data_home(), 'reuters21578')
+    reuters_pickle_path = os.path.join(data_path, "reuters." + subset + ".pickle")
+    if not os.path.exists(reuters_pickle_path):
+        parser = ReutersParser(data_path=data_path)
+        for filename in glob(os.path.join(data_path, "*.sgm")):
+            parser.parse(open(filename, 'rb'))
+        # index category names with a unique numerical code (only considering categories with training examples)
+        tr_categories = np.unique(np.concatenate([doc['topics'] for doc in parser.tr_docs])).tolist()
+
+        def pickle_documents(docs, subset):
+            for doc in docs:
+                doc['topics'] = [tr_categories.index(t) for t in doc['topics'] if t in tr_categories]
+            pickle_docs = {'categories': tr_categories, 'documents': docs}
+            pickle.dump(pickle_docs, open(os.path.join(data_path, "reuters." + subset + ".pickle"), 'wb'),
+                        protocol=pickle.HIGHEST_PROTOCOL)
+            return pickle_docs
+
+        pickle_tr = pickle_documents(parser.tr_docs, "train")
+        pickle_te = pickle_documents(parser.te_docs, "test")
+        # self.sout('Empty docs %d' % parser.empty_docs)
+        requested_subset = pickle_tr if subset == 'train' else pickle_te
+    else:
+        requested_subset = pickle.load(open(reuters_pickle_path, 'rb'))
+
+    data = [(u'{title}\n{body}\n{unproc}'.format(**doc), doc['topics']) for doc in requested_subset['documents']]
+    text_data, topics = zip(*data)
+    return LabelledDocuments(data=text_data, target=topics, target_names=requested_subset['categories'])
+
+
+
+if __name__=='__main__':
+    reuters_train = fetch_reuters21578(subset='train')
+    print(reuters_train.data)
--- a/MultiLabel/data/tsr_function__.py
+++ b/MultiLabel/data/tsr_function__.py
@ -0,0 +1,280 @@
+import math
+import numpy as np
+from scipy.stats import t
+from scipy.stats import norm
+from joblib import Parallel, delayed
+import time
+from scipy.sparse import csr_matrix, csc_matrix
+
+
+STWFUNCTIONS = ['dotn', 'ppmi', 'ig', 'chi2', 'cw', 'wp']
+
+
+def get_probs(tpr, fpr, pc):
+    # tpr = p(t|c) = p(tp)/p(c) = p(tp)/(p(tp)+p(fn))
+    # fpr = p(t|_c) = p(fp)/p(_c) = p(fp)/(p(fp)+p(tn))
+    pnc = 1.0 - pc
+    tp = tpr * pc
+    fn = pc - tp
+    fp = fpr * pnc
+    tn = pnc - fp
+    return ContTable(tp=tp, fn=fn, fp=fp, tn=tn)
+
+
+def apply_tsr(tpr, fpr, pc, tsr):
+    cell = get_probs(tpr, fpr, pc)
+    return tsr(cell)
+
+
+def positive_information_gain(cell):
+    if cell.tpr() < cell.fpr():
+        return 0.0
+    else:
+        return information_gain(cell)
+
+
+def posneg_information_gain(cell):
+    ig = information_gain(cell)
+    if cell.tpr() < cell.fpr():
+        return -ig
+    else:
+        return ig
+
+
+def __ig_factor(p_tc, p_t, p_c):
+    den = p_t * p_c
+    if den != 0.0 and p_tc != 0:
+        return p_tc * math.log(p_tc / den, 2)
+    else:
+        return 0.0
+
+
+def information_gain(cell):
+    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + \
+           __ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) +\
+           __ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()) + \
+           __ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c())
+
+
+def information_gain_mod(cell):
+    return (__ig_factor(cell.p_tp(), cell.p_f(), cell.p_c()) + __ig_factor(cell.p_tn(), cell.p_not_f(), cell.p_not_c()))  \
+           - (__ig_factor(cell.p_fp(), cell.p_f(), cell.p_not_c()) + __ig_factor(cell.p_fn(), cell.p_not_f(), cell.p_c()))
+
+
+def pointwise_mutual_information(cell):
+    return __ig_factor(cell.p_tp(), cell.p_f(), cell.p_c())
+
+
+def gain_ratio(cell):
+    pc = cell.p_c()
+    pnc = 1.0 - pc
+    norm = pc * math.log(pc, 2) + pnc * math.log(pnc, 2)
+    return information_gain(cell) / (-norm)
+
+
+def chi_square(cell):
+    den = cell.p_f() * cell.p_not_f() * cell.p_c() * cell.p_not_c()
+    if den==0.0: return 0.0
+    num = gss(cell)**2
+    return num / den
+
+
+def relevance_frequency(cell):
+    a = cell.tp
+    c = cell.fp
+    if c == 0: c = 1
+    return math.log(2.0 + (a * 1.0 / c), 2)
+
+
+def idf(cell):
+    if cell.p_f()>0:
+        return math.log(1.0 / cell.p_f())
+    return 0.0
+
+
+def gss(cell):
+    return cell.p_tp()*cell.p_tn() - cell.p_fp()*cell.p_fn()
+
+
+def conf_interval(xt, n):
+    if n>30:
+        z2 = 3.84145882069 # norm.ppf(0.5+0.95/2.0)**2
+    else:
+        z2 = t.ppf(0.5 + 0.95 / 2.0, df=max(n-1,1)) ** 2
+    p = (xt + 0.5 * z2) / (n + z2)
+    amplitude = 0.5 * z2 * math.sqrt((p * (1.0 - p)) / (n + z2))
+    return p, amplitude
+
+
+def strength(minPosRelFreq, minPos, maxNeg):
+    if minPos > maxNeg:
+        return math.log(2.0 * minPosRelFreq, 2.0)
+    else:
+        return 0.0
+
+
+#set cancel_features=True to allow some features to be weighted as 0 (as in the original article)
+#however, for some extremely imbalanced dataset caused all documents to be 0
+def conf_weight(cell, cancel_features=False):
+    c = cell.get_c()
+    not_c = cell.get_not_c()
+    tp = cell.tp
+    fp = cell.fp
+
+    pos_p, pos_amp = conf_interval(tp, c)
+    neg_p, neg_amp = conf_interval(fp, not_c)
+
+    min_pos = pos_p-pos_amp
+    max_neg = neg_p+neg_amp
+    den = (min_pos + max_neg)
+    minpos_relfreq = min_pos / (den if den != 0 else 1)
+
+    str_tplus = strength(minpos_relfreq, min_pos, max_neg);
+
+    if str_tplus == 0 and not cancel_features:
+        return 1e-20
+
+    return str_tplus;
+
+
+def word_prob(cell):
+    return cell.tpr()
+
+
+class ContTable:
+
+    def __init__(self, tp=0, tn=0, fp=0, fn=0):
+        self.tp=tp
+        self.tn=tn
+        self.fp=fp
+        self.fn=fn
+
+    def get_d(self): return self.tp + self.tn + self.fp + self.fn
+
+    def get_c(self): return self.tp + self.fn
+
+    def get_not_c(self): return self.tn + self.fp
+
+    def get_f(self): return self.tp + self.fp
+
+    def get_not_f(self): return self.tn + self.fn
+
+    def p_c(self): return (1.0*self.get_c())/self.get_d()
+
+    def p_not_c(self): return 1.0-self.p_c()
+
+    def p_f(self): return (1.0*self.get_f())/self.get_d()
+
+    def p_not_f(self): return 1.0-self.p_f()
+
+    def p_tp(self): return (1.0*self.tp) / self.get_d()
+
+    def p_tn(self): return (1.0*self.tn) / self.get_d()
+
+    def p_fp(self): return (1.0*self.fp) / self.get_d()
+
+    def p_fn(self): return (1.0*self.fn) / self.get_d()
+
+    def tpr(self):
+        c = 1.0*self.get_c()
+        return self.tp / c if c > 0.0 else 0.0
+
+    def fpr(self):
+        _c = 1.0*self.get_not_c()
+        return self.fp / _c if _c > 0.0 else 0.0
+
+
+def round_robin_selection(X, Y, k, tsr_function=positive_information_gain):
+    print(f'[selectiong {k} terms]')
+    nC = Y.shape[1]
+    FC = get_tsr_matrix(get_supervised_matrix(X, Y), tsr_function).T
+    best_features_idx = np.argsort(-FC, axis=0).flatten()
+    tsr_values = FC.flatten()
+    selected_indexes_set = set()
+    selected_indexes = list()
+    selected_value = list()
+    from_category = list()
+    round_robin = iter(best_features_idx)
+    values_iter = iter(tsr_values)
+    round=0
+    while len(selected_indexes) < k:
+        term_idx = next(round_robin)
+        term_val = next(values_iter)
+        if term_idx not in selected_indexes_set:
+            selected_indexes_set.add(term_idx)
+            selected_indexes.append(term_idx)
+            selected_value.append(term_val)
+            from_category.append(round)
+        round = (round + 1) % nC
+    return np.asarray(selected_indexes, dtype=int), np.asarray(selected_value, dtype=float), np.asarray(from_category)
+
+
+def feature_label_contingency_table(positive_document_indexes, feature_document_indexes, nD):
+    tp_ = len(positive_document_indexes & feature_document_indexes)
+    fp_ = len(feature_document_indexes - positive_document_indexes)
+    fn_ = len(positive_document_indexes - feature_document_indexes)
+    tn_ = nD - (tp_ + fp_ + fn_)
+    return ContTable(tp=tp_, tn=tn_, fp=fp_, fn=fn_)
+
+
+def category_tables(feature_sets, category_sets, c, nD, nF):
+    return [feature_label_contingency_table(category_sets[c], feature_sets[f], nD) for f in range(nF)]
+
+
+"""
+Computes the nC x nF supervised matrix M where Mcf is the 4-cell contingency table for feature f and class c.
+Efficiency O(nF x nC x log(S)) where S is the sparse factor
+"""
+def get_supervised_matrix(coocurrence_matrix, label_matrix, n_jobs=-1):
+    nD, nF = coocurrence_matrix.shape
+    nD2, nC = label_matrix.shape
+
+    if nD != nD2:
+        raise ValueError('Number of rows in coocurrence matrix shape %s and label matrix shape %s is not consistent' %
+                         (coocurrence_matrix.shape,label_matrix.shape))
+
+    def nonzero_set(matrix, col):
+        return set(matrix[:, col].nonzero()[0])
+
+    if isinstance(coocurrence_matrix, csr_matrix):
+        coocurrence_matrix = csc_matrix(coocurrence_matrix)
+    feature_sets = [nonzero_set(coocurrence_matrix, f) for f in range(nF)]
+    category_sets = [nonzero_set(label_matrix, c) for c in range(nC)]
+    cell_matrix = Parallel(n_jobs=n_jobs, backend="threading")(delayed(category_tables)(feature_sets, category_sets, c, nD, nF) for c in range(nC))
+    return np.array(cell_matrix)
+
+# obtains the matrix T where Tcf=tsr(f,c) is the tsr score for category c and feature f
+def get_tsr_matrix(cell_matrix, tsr_score_funtion):
+    nC,nF = cell_matrix.shape
+    tsr_matrix = [[tsr_score_funtion(cell_matrix[c,f]) for f in range(nF)] for c in range(nC)]
+    return np.array(tsr_matrix)
+
+
+""" The Fisher-score [1] is not computed on the 4-cell contingency table, but can
+take as input any real-valued feature column (e.g., tf-idf weights).
+feat is the feature vector, and c is a binary classification vector.
+This implementation covers only the binary case, while the formula is defined for multiclass
+single-label scenarios, for which the version [2] might be preferred.
+[1] R.O. Duda, P.E. Hart, and D.G. Stork. Pattern classification. Wiley-interscience, 2012.
+[2] Gu, Q., Li, Z., & Han, J. (2012). Generalized fisher score for feature selection. arXiv preprint arXiv:1202.3725.
+"""
+def fisher_score_binary(feat, c):
+    neg = np.ones_like(c) - c
+
+    npos = np.sum(c)
+    nneg = np.sum(neg)
+
+    mupos = np.mean(feat[c == 1])
+    muneg = np.mean(feat[neg == 1])
+    mu = np.mean(feat)
+
+    stdpos = np.std(feat[c == 1])
+    stdneg = np.std(feat[neg == 1])
+
+    num = npos * ((mupos - mu) ** 2) + nneg * ((muneg - mu) ** 2)
+    den = npos * (stdpos ** 2) + nneg * (stdneg ** 2)
+
+    if den>0:
+        return num / den
+    else:
+        return num
--- a/MultiLabel/data/wipo_reader.py
+++ b/MultiLabel/data/wipo_reader.py
@ -0,0 +1,212 @@
+#https://www.wipo.int/classifications/ipc/en/ITsupport/Categorization/dataset/
+import os, sys
+from os.path import exists, join
+from util.file import *
+from zipfile import ZipFile
+import xml.etree.ElementTree as ET
+from tqdm import tqdm
+import numpy as np
+import pickle
+from joblib import Parallel, delayed
+
+WIPO_URL= 'https://www.wipo.int/classifications/ipc/en/ITsupport/Categorization/dataset/'
+
+
+class WipoGammaDocument:
+    def __init__(self, id, text, main_label, all_labels):
+        self.id = id
+        self.text = text
+        self.main_label = main_label
+        self.all_labels = all_labels
+
+
+def remove_nested_claimtext_tags(xmlcontent):
+    from_pos = xmlcontent.find(b'<claims')
+    to_pos = xmlcontent.find(b'</claims>')
+    if from_pos > -1 and to_pos > -1:
+        in_between = xmlcontent[from_pos:to_pos].replace(b'<claim-text>',b'').replace(b'</claim-text>',b'')
+        xmlcontent = (xmlcontent[:from_pos]+in_between+xmlcontent[to_pos:]).strip()
+    return xmlcontent
+
+
+def parse_document(xml_content, text_fields, limit_description):
+    root = ET.fromstring(remove_nested_claimtext_tags(xml_content))
+
+    doc_id  = root.attrib['ucid']
+    lang    = root.attrib['lang']
+
+    #take categories from the categorization up the "sub-class" level
+    main_group = set(t.text[:6] for t in root.findall('.//bibliographic-data/technical-data/classifications-ipcr/classification-ipcr[@computed="from_ecla_to_ipc_SG"][@generated_main_IPC="true"]'))
+    sec_groups = set(t.text[:6] for t in root.findall('.//bibliographic-data/technical-data/classifications-ipcr/classification-ipcr[@computed="from_ecla_to_ipc_SG"][@generated_main_IPC="false"]'))
+    sec_groups.update(main_group)
+
+    assert len(main_group) == 1, 'more than one main groups'
+    main_group = list(main_group)[0]
+    sec_groups = sorted(list(sec_groups))
+
+    assert lang == 'EN', f'only English documents allowed (doc {doc_id})'
+
+    doc_text_fields=[]
+    if 'abstract' in text_fields:
+        abstract = '\n'.join(filter(None, [t.text for t in root.findall('.//abstract[@lang="EN"]/p')]))
+        doc_text_fields.append(abstract)
+    if 'description' in text_fields:
+        description = '\n'.join(filter(None, [t.text for t in root.findall('.//description[@lang="EN"]/p')]))
+        if limit_description>-1:
+            description=' '.join(description.split()[:limit_description])
+        doc_text_fields.append(description)
+    if 'claims' in text_fields:
+        claims = '\n'.join(filter(None, [t.text for t in root.findall('.//claims[@lang="EN"]/claim')]))
+        doc_text_fields.append(claims)
+
+    text = '\n'.join(doc_text_fields)
+    if text:
+        return WipoGammaDocument(doc_id, text, main_group, sec_groups)
+    else:
+        return None
+
+
+def extract(fin, fout, text_fields, limit_description):
+    zipfile = ZipFile(fin)
+    ndocs=0
+    with open(fout, 'wt') as out:
+        for xmlfile in tqdm(zipfile.namelist()):
+            if xmlfile.endswith('.xml'):
+                xmlcontent = zipfile.open(xmlfile).read()
+                document = parse_document(xmlcontent, text_fields, limit_description)
+                if document:
+                    line_text = document.text.replace('\n', ' ').replace('\t', ' ').strip()
+                    assert line_text, f'empty document in {xmlfile}'
+                    all_labels = ' '.join(document.all_labels)
+                    out.write('\t'.join([document.id, document.main_label, all_labels, line_text]))
+                    out.write('\n')
+                    ndocs+=1
+            out.flush()
+
+
+
+def read_classification_file(data_path, classification_level):
+    assert classification_level in ['subclass', 'maingroup'], 'wrong classification requested'
+    z = ZipFile(join(data_path,'EnglishWipoGamma1.zip'))
+    inpath='Wipo_Gamma/English/TrainTestSpits'
+    document_labels = dict()
+    train_ids, test_ids = set(), set()
+    labelcut = LabelCut(classification_level)
+    for subset in tqdm(['train', 'test'], desc='loading classification file'):
+        target_subset = train_ids if subset=='train' else test_ids
+        if classification_level == 'subclass':
+            file = f'{subset}set_en_sc.parts' #sub-class level
+        else:
+            file = f'{subset}set_en_mg.parts' #main-group level
+
+        for line in z.open(f'{inpath}/{file}').readlines():
+            line = line.decode().strip().split(',')
+            id = line[0]
+            id = id[id.rfind('/')+1:].replace('.xml','')
+            labels = labelcut.trim(line[1:])
+            document_labels[id]=labels
+            target_subset.add(id)
+
+    return document_labels, train_ids, test_ids
+
+
+class LabelCut:
+    """
+    Labels consists of 1 char for section, 2 chars for class, 1 class for subclass, 2 chars for maingroup and so on.
+    This class cuts the label at a desired level (4 for subclass, or 6 for maingroup)
+    """
+    def __init__(self, classification_level):
+        assert classification_level in {'subclass','maingroup'}, 'unknown classification level'
+        if classification_level == 'subclass': self.cut = 4
+        else: self.cut = 6
+
+    def trim(self, label):
+        if isinstance(label, list):
+            return sorted(set([l[:self.cut] for l in label]))
+        else:
+            return label[:self.cut]
+
+
+
+def fetch_WIPOgamma(subset, classification_level, data_home, extracted_path, text_fields = ['abstract', 'description'], limit_description=300):
+    """
+    Fetchs the WIPO-gamma dataset
+    :param subset: 'train' or 'test' split
+    :param classification_level: the classification level, either 'subclass' or 'maingroup'
+    :param data_home: directory containing the original 11 English zips
+    :param extracted_path: directory used to extract and process the original files
+    :param text_fields: indicates the fields to extract, in 'abstract', 'description', 'claims'
+    :param limit_description: the maximum number of words to take from the description field (default 300); set to -1 for all
+    :return:
+    """
+    assert subset in {"train", "test"}, 'unknown target request (valid ones are "train" or "test")'
+    assert len(text_fields)>0, 'at least some text field should be indicated'
+    if not exists(data_home):
+        raise ValueError(f'{data_home} does not exist, and the dataset cannot be automatically download, '
+              f'since you need to request for permission. Please refer to {WIPO_URL}')
+
+    create_if_not_exist(extracted_path)
+    config = f'{"-".join(text_fields)}'
+    if 'description' in text_fields: config+='-{limit_description}'
+    pickle_path=join(extracted_path, f'wipo-{subset}-{classification_level}-{config}.pickle')
+    if exists(pickle_path):
+        print(f'loading pickled file in {pickle_path}')
+        return pickle.load(open(pickle_path,'rb'))
+
+    print('pickle file not found, processing...(this will take some minutes)')
+    extracted = sum([exists(f'{extracted_path}/EnglishWipoGamma{(i+1)}-{config}.txt') for i in range(11)])==11
+    if not extracted:
+        print(f'extraction files not found, extracting files in {data_home}... (this will take some additional minutes)')
+        Parallel(n_jobs=-1)(
+            delayed(extract)(
+                join(data_home, file), join(extracted_path, file.replace('.zip', f'-{config}.txt')), text_fields, limit_description
+            )
+            for file in list_files(data_home)
+        )
+    doc_labels, train_ids, test_ids = read_classification_file(data_home, classification_level=classification_level)  # or maingroup
+    print(f'{len(doc_labels)} documents classified split in {len(train_ids)} train and {len(test_ids)} test documents')
+
+    train_request = []
+    test_request  = []
+    pbar = tqdm([filename for filename in list_files(extracted_path) if filename.endswith(f'-{config}.txt')])
+    labelcut = LabelCut(classification_level)
+    errors=0
+    for proc_file in pbar:
+        pbar.set_description(f'processing {proc_file} [errors={errors}]')
+        if not proc_file.endswith(f'-{config}.txt'): continue
+        lines = open(f'{extracted_path}/{proc_file}', 'rt').readlines()
+        for lineno,line in enumerate(lines):
+            parts = line.split('\t')
+            assert len(parts)==4, f'wrong format in {extracted_path}/{proc_file} line {lineno}'
+            id,mainlabel,alllabels,text=parts
+            mainlabel = labelcut.trim(mainlabel)
+            alllabels = labelcut.trim(alllabels.split())
+
+            # assert id in train_ids or id in test_ids, f'id {id} out of scope'
+            if id not in train_ids and id not in test_ids:
+                errors+=1
+            else:
+                # assert mainlabel == doc_labels[id][0], 'main label not consistent'
+                request = train_request if id in train_ids else test_request
+                request.append(WipoGammaDocument(id, text, mainlabel, alllabels))
+
+    print('pickling requests for faster subsequent runs')
+    pickle.dump(train_request, open(join(extracted_path,f'wipo-train-{classification_level}-{config}.pickle'), 'wb', pickle.HIGHEST_PROTOCOL))
+    pickle.dump(test_request, open(join(extracted_path, f'wipo-test-{classification_level}-{config}.pickle'), 'wb', pickle.HIGHEST_PROTOCOL))
+
+    if subset== 'train':
+        return train_request
+    else:
+        return test_request
+
+
+if __name__=='__main__':
+    data_home = '../../datasets/WIPO/wipo-gamma/en'
+    extracted_path = '../../datasets/WIPO-extracted'
+
+    train = fetch_WIPOgamma(subset='train', classification_level='subclass', data_home=data_home, extracted_path=extracted_path, text_fields=('abstract'))
+    test = fetch_WIPOgamma(subset='test', classification_level='subclass', data_home=data_home, extracted_path=extracted_path, text_fields=('abstract'))
+    # train = fetch_WIPOgamma(subset='train', classification_level='maingroup', data_home=data_home, extracted_path=extracted_path)
+    # test = fetch_WIPOgamma(subset='test', classification_level='maingroup', data_home=data_home, extracted_path=extracted_path)
+
+    print('Done')
--- a/MultiLabel/gentables.py
+++ b/MultiLabel/gentables.py
@ -0,0 +1,118 @@
+import argparse
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression
+import itertools
+from sklearn.multioutput import ClassifierChain
+from tqdm import tqdm
+from skmultilearn.dataset import load_dataset, available_data_sets
+from scipy.sparse import csr_matrix
+import quapy as qp
+from MultiLabel.main import load_results, SKMULTILEARN_RED_DATASETS, TC_DATASETS, sample_size
+from MultiLabel.mlclassification import MLStackedClassifier
+from MultiLabel.mldata import MultilabelledCollection
+from MultiLabel.mlquantification import MLNaiveQuantifier, MLCC, MLPCC, MLRegressionQuantification, \
+    MLACC, \
+    MLPACC, MLNaiveAggregativeQuantifier
+from MultiLabel.tabular import Table
+from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
+import numpy as np
+from data.dataset  import Dataset
+from mlevaluation import ml_natural_prevalence_prediction, ml_artificial_prevalence_prediction, check_error_str
+import sys
+import os
+import pickle
+
+models = [#'MLPE',
+    'NaiveCC', 'NaivePCC', 'NaivePCCcal', 'NaiveACC', 'NaivePACC', 'NaivePACCcal', 'NaiveACCit', 'NaivePACCit',
+    #'NaiveHDy', 'NaiveSLD',
+    'ChainCC', 'ChainPCC', 'ChainACC', 'ChainPACC',
+    'StackCC', 'StackPCC', 'StackPCCcal', 'StackACC', 'StackPACC', 'StackPACCcal', 'StackACCit', 'StackP'
+                                                                  'ACCit',
+    'MRQ-CC', 'MRQ-PCC', 'MRQ-ACC', 'MRQ-PACC',  'MRQ-ACCit', 'MRQ-PACCit',
+    'StackMRQ-CC', 'StackMRQ-PCC', 'StackMRQ-ACC', 'StackMRQ-PACC',
+    'MRQ-StackCC', 'MRQ-StackPCC', 'MRQ-StackACC', 'MRQ-StackPACC',
+    'StackMRQ-StackCC', 'StackMRQ-StackPCC', 'StackMRQ-StackACC', 'StackMRQ-StackPACC',
+    'MRQ-StackCC-app', 'MRQ-StackPCC-app', 'MRQ-StackACC-app', 'MRQ-StackPACC-app',
+    'StackMRQ-StackCC-app', 'StackMRQ-StackPCC-app', 'StackMRQ-StackACC-app', 'StackMRQ-StackPACC-app',
+    'LSP-CC', 'LSP-ACC', 'MLKNN-CC', 'MLKNN-ACC',
+    'MLAdjustedC', 'MLStackAdjustedC', 'MLprobAdjustedC', 'MLStackProbAdjustedC'
+]
+
+# datasets = sorted(set([x[0] for x in available_data_sets().keys()]))
+datasets = TC_DATASETS
+
+
+
+
+def generate_table(path, protocol, error):
+
+    def compute_score_job(args):
+        dataset, model = args
+        result_path = f'{opt.results}/{dataset}_{model}.pkl'
+        if os.path.exists(result_path):
+            print('+', end='')
+            sys.stdout.flush()
+            result = load_results(result_path)
+            true_prevs, estim_prevs = result[protocol]
+            scores = np.asarray([error(trues, estims) for trues, estims in zip(true_prevs, estim_prevs)]).flatten()
+            return dataset, model, scores
+        print('-', end='')
+        sys.stdout.flush()
+        return None
+
+
+    print(f'\ngenerating {path}')
+    table = Table(datasets, models, prec_mean=4, significance_test='wilcoxon')
+    results = qp.util.parallel(compute_score_job, list(itertools.product(datasets, models)), n_jobs=-1)
+    print()
+
+    for r in results:
+        if r is not None:
+            dataset, model, scores = r
+            table.add(dataset, model, scores)
+
+    save_table(table, path)
+    save_table(table.getRankTable(), path.replace('.tex','.rank.tex'))
+
+
+
+def save_table(table, path):
+    tabular = """
+    \\resizebox{\\textwidth}{!}{%
+            \\begin{tabular}{|c||""" + ('c|' * len(models)) + """} \hline
+            """
+    dataset_replace = {'tmc2007_500': 'tmc2007\_500', 'tmc2007_500-red': 'tmc2007\_500-red'}
+    method_replace = {}
+
+    tabular += table.latexTabularT(benchmark_replace=dataset_replace, method_replace=method_replace, side=True)
+    tabular += """
+        \end{tabular}%
+        }
+    """
+    with open(path, 'wt') as foo:
+        foo.write(tabular)
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Experiments for multi-label quantification')
+    parser.add_argument('--results', type=str, default='./results', metavar='str',
+                        help=f'path where to store the results')
+    parser.add_argument('--tablepath', type=str, default='./tables', metavar='str',
+                        help=f'path where to store the tables')
+    opt = parser.parse_args()
+
+    assert os.path.exists(opt.results), f'result directory {opt.results} does not exist'
+    os.makedirs(opt.tablepath, exist_ok=True)
+
+    qp.environ["SAMPLE_SIZE"] = sample_size
+    absolute_error = qp.error.ae
+    relative_absolute_error = qp.error.rae
+
+    generate_table(f'{opt.tablepath}/npp.ae.tex', protocol='npp', error=absolute_error)
+    generate_table(f'{opt.tablepath}/app.ae.tex', protocol='app', error=absolute_error)
+    generate_table(f'{opt.tablepath}/npp.rae.tex', protocol='npp', error=relative_absolute_error)
+    generate_table(f'{opt.tablepath}/app.rae.tex', protocol='app', error=relative_absolute_error)
+
+
+
+
+
--- a/MultiLabel/main.py
+++ b/MultiLabel/main.py
@ -0,0 +1,290 @@
+import argparse
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression
+import itertools
+
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.multioutput import ClassifierChain
+from tqdm import tqdm
+from skmultilearn.dataset import load_dataset, available_data_sets
+from scipy.sparse import csr_matrix
+import quapy as qp
+from MultiLabel.mlclassification import MLStackedClassifier, LabelSpacePartion, MLTwinSVM, MLknn
+from MultiLabel.mldata import MultilabelledCollection
+from MultiLabel.mlquantification import MLNaiveQuantifier, MLCC, MLPCC, MLRegressionQuantification, \
+    MLACC, \
+    MLPACC, MLNaiveAggregativeQuantifier, MLMLPE, StackMLRQuantifier, MLadjustedCount, MLprobAdjustedCount
+from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
+import numpy as np
+from data.dataset  import Dataset
+from mlevaluation import ml_natural_prevalence_prediction, ml_artificial_prevalence_prediction
+import sys
+import os
+import pickle
+
+
+def cls():
+    # return LinearSVC()
+    return LogisticRegression(max_iter=1000, solver='lbfgs')
+
+
+def calibratedCls():
+    return CalibratedClassifierCV(cls())
+
+# DEBUG=True
+
+# if DEBUG:
+sample_size = 100
+n_samples = 5000
+
+SKMULTILEARN_ALL_DATASETS = sorted(set([x[0] for x in available_data_sets().keys()]))
+SKMULTILEARN_RED_DATASETS = [x+'-red' for x in SKMULTILEARN_ALL_DATASETS]
+TC_DATASETS = ['reuters21578', 'jrcall', 'ohsumed', 'rcv1']
+
+DATASETS = TC_DATASETS
+
+
+
+
+
+def models():
+    yield 'MLPE', MLMLPE()
+    yield 'NaiveCC', MLNaiveAggregativeQuantifier(CC(cls()))
+    yield 'NaivePCC', MLNaiveAggregativeQuantifier(PCC(cls()))
+    yield 'NaivePCCcal', MLNaiveAggregativeQuantifier(PCC(calibratedCls()))
+    yield 'NaiveACC', MLNaiveAggregativeQuantifier(ACC(cls()))
+    yield 'NaivePACC', MLNaiveAggregativeQuantifier(PACC(cls()))
+    yield 'NaivePACCcal', MLNaiveAggregativeQuantifier(PACC(calibratedCls()))
+    yield 'NaiveACCit', MLNaiveAggregativeQuantifier(ACC(cls()))
+    yield 'NaivePACCit', MLNaiveAggregativeQuantifier(PACC(cls()))
+    # yield 'NaiveHDy', MLNaiveAggregativeQuantifier(HDy(cls()))
+    # yield 'NaiveSLD', MLNaiveAggregativeQuantifier(EMQ(calibratedCls()))
+    yield 'StackCC', MLCC(MLStackedClassifier(cls()))
+    yield 'StackPCC', MLPCC(MLStackedClassifier(cls()))
+    yield 'StackPCCcal', MLPCC(MLStackedClassifier(calibratedCls()))
+    yield 'StackACC', MLACC(MLStackedClassifier(cls()))
+    yield 'StackPACC', MLPACC(MLStackedClassifier(cls()))
+    yield 'StackPACCcal', MLPACC(MLStackedClassifier(calibratedCls()))
+    yield 'StackACCit', MLACC(MLStackedClassifier(cls()))
+    yield 'StackPACCit', MLPACC(MLStackedClassifier(cls()))
+    # yield 'ChainCC', MLCC(ClassifierChain(cls(), cv=None))
+    # yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None))
+    # yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None))
+    # yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None))
+    common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False, 'regression':'svr'}
+    yield 'MRQ-CC', MLRegressionQuantification(MLNaiveQuantifier(CC(cls())), **common)
+    yield 'MRQ-PCC', MLRegressionQuantification(MLNaiveQuantifier(PCC(cls())), **common)
+    yield 'MRQ-ACC', MLRegressionQuantification(MLNaiveQuantifier(ACC(cls())), **common)
+    yield 'MRQ-PACC', MLRegressionQuantification(MLNaiveQuantifier(PACC(cls())), **common)
+    yield 'MRQ-ACCit', MLRegressionQuantification(MLNaiveQuantifier(ACC(cls())), **common)
+    yield 'MRQ-PACCit', MLRegressionQuantification(MLNaiveQuantifier(PACC(cls())), **common)
+    yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MLStackedClassifier(cls())), **common)
+    yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MLStackedClassifier(cls())), **common)
+    yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MLStackedClassifier(cls())), **common)
+    yield 'MRQ-StackPACC', MLRegressionQuantification(MLPACC(MLStackedClassifier(cls())), **common)
+    yield 'MRQ-StackCC-app', MLRegressionQuantification(MLCC(MLStackedClassifier(cls())), protocol='app', **common)
+    yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MLStackedClassifier(cls())), protocol='app', **common)
+    yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MLStackedClassifier(cls())), protocol='app', **common)
+    yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MLStackedClassifier(cls())), protocol='app', **common)
+    yield 'StackMRQ-CC', StackMLRQuantifier(MLNaiveQuantifier(CC(cls())), **common)
+    yield 'StackMRQ-PCC', StackMLRQuantifier(MLNaiveQuantifier(PCC(cls())), **common)
+    yield 'StackMRQ-ACC', StackMLRQuantifier(MLNaiveQuantifier(ACC(cls())), **common)
+    yield 'StackMRQ-PACC', StackMLRQuantifier(MLNaiveQuantifier(PACC(cls())), **common)
+    yield 'StackMRQ-StackCC', StackMLRQuantifier(MLCC(MLStackedClassifier(cls())), **common)
+    yield 'StackMRQ-StackPCC', StackMLRQuantifier(MLPCC(MLStackedClassifier(cls())), **common)
+    yield 'StackMRQ-StackACC', StackMLRQuantifier(MLACC(MLStackedClassifier(cls())), **common)
+    yield 'StackMRQ-StackPACC', StackMLRQuantifier(MLPACC(MLStackedClassifier(cls())), **common)
+    yield 'StackMRQ-StackCC-app', StackMLRQuantifier(MLCC(MLStackedClassifier(cls())), protocol='app', **common)
+    yield 'StackMRQ-StackPCC-app', StackMLRQuantifier(MLPCC(MLStackedClassifier(cls())), protocol='app', **common)
+    yield 'StackMRQ-StackACC-app', StackMLRQuantifier(MLACC(MLStackedClassifier(cls())), protocol='app', **common)
+    yield 'StackMRQ-StackPACC-app', StackMLRQuantifier(MLPACC(MLStackedClassifier(cls())), protocol='app', **common)
+    yield 'MLAdjustedC', MLadjustedCount(OneVsRestClassifier(cls()))
+    yield 'MLStackAdjustedC', MLadjustedCount(MLStackedClassifier(cls()))
+    # yield 'MLprobAdjustedC', MLprobAdjustedCount(OneVsRestClassifier(calibratedCls()))
+    # yield 'MLStackProbAdjustedC', MLprobAdjustedCount(MLStackedClassifier(calibratedCls()))
+
+    # yield 'MRQ-ChainCC', MLRegressionQuantification(MLCC(ClassifierChain(cls())), **common)
+    # yield 'MRQ-ChainPCC', MLRegressionQuantification(MLPCC(ClassifierChain(cls())), **common)
+    # yield 'MRQ-ChainACC', MLRegressionQuantification(MLACC(ClassifierChain(cls())), **common)
+    # yield 'MRQ-ChainPACC', MLRegressionQuantification(MLPACC(ClassifierChain(cls())), **common)
+    # yield 'LSP-CC', MLCC(LabelSpacePartion(cls()))
+    # yield 'LSP-ACC', MLACC(LabelSpacePartion(cls()))
+    # yield 'TwinSVM-CC', MLCC(MLTwinSVM())
+    # yield 'TwinSVM-ACC', MLACC(MLTwinSVM())
+    # yield 'MLKNN-CC', MLCC(MLknn())
+    #yield 'MLKNN-PCC', MLPCC(MLknn())
+    # yield 'MLKNN-ACC', MLACC(MLknn())
+    #yield 'MLKNN-PACC', MLPACC(MLknn())
+
+
+def get_dataset(dataset_name, dopickle=True):
+    datadir = f'{qp.util.get_quapy_home()}/pickles'
+    datapath = f'{datadir}/{dataset_name}.pkl'
+    if dopickle:
+        if os.path.exists(datapath):
+            print(f'returning pickled object in {datapath}')
+            return pickle.load(open(datapath, 'rb'))
+
+    if dataset_name in SKMULTILEARN_ALL_DATASETS + SKMULTILEARN_RED_DATASETS:
+        clean_name = dataset_name.replace('-red','')
+        Xtr, ytr, feature_names, label_names = load_dataset(clean_name, 'train')
+        Xte, yte, _, _ = load_dataset(clean_name, 'test')
+        print(f'n-labels = {len(label_names)}')
+
+        Xtr = csr_matrix(Xtr)
+        Xte = csr_matrix(Xte)
+
+        ytr = ytr.todense().getA()
+        yte = yte.todense().getA()
+
+        if dataset_name.endswith('-red'):
+            TO_SELECT = 10
+            nC = ytr.shape[1]
+            tr_counts = ytr.sum(axis=0)
+            te_counts = yte.sum(axis=0)
+            if nC > TO_SELECT:
+                Y = ytr.T.dot(ytr)  # class-class coincidence matrix
+                Y[np.triu_indices(nC)] = 0  # zeroing all duplicates entries and the diagonal
+                order_ij = np.argsort(-Y, axis=None)
+                selected = set()
+                p=0
+                while len(selected) < TO_SELECT:
+                    highest_index = order_ij[p]
+                    class_i = highest_index // nC
+                    class_j = highest_index % nC
+                    # if there is only one class to go, then add the most populated one
+                    most_populated, least_populated = (class_i, class_j) if tr_counts[class_i] > tr_counts[class_j] else (class_j, class_i)
+                    if te_counts[most_populated]>0:
+                        selected.add(most_populated)
+                    if len(selected) < TO_SELECT:
+                        if te_counts[least_populated]>0:
+                            selected.add(least_populated)
+                    p+=1
+                selected = np.asarray(sorted(selected))
+                ytr = ytr[:,selected]
+                yte = yte[:, selected]
+        # else:
+            # remove categories without positives in the training or test splits
+            # valid_categories = np.logical_and(ytr.sum(axis=0)>5, yte.sum(axis=0)>5)
+            # ytr = ytr[:, valid_categories]
+            # yte = yte[:, valid_categories]
+
+    elif dataset_name in TC_DATASETS:
+        picklepath = '/home/moreo/word-class-embeddings/pickles'
+        data = Dataset.load(dataset_name, pickle_path=f'{picklepath}/{dataset_name}.pickle')
+        Xtr, Xte = data.vectorize()
+        ytr = data.devel_labelmatrix.todense().getA()
+        yte = data.test_labelmatrix.todense().getA()
+
+        # remove categories with < 50 training or test documents
+        # to_keep = np.logical_and(ytr.sum(axis=0)>=50, yte.sum(axis=0)>=50)
+        # keep the 10 most populated categories
+        to_keep = np.argsort(ytr.sum(axis=0))[-10:]
+        ytr = ytr[:, to_keep]
+        yte = yte[:, to_keep]
+        print(f'num categories = {ytr.shape[1]}')
+
+    else:
+        raise ValueError(f'unknown dataset {dataset_name}')
+
+    train = MultilabelledCollection(Xtr, ytr)
+    test = MultilabelledCollection(Xte, yte)
+
+    if dopickle:
+        os.makedirs(datadir, exist_ok=True)
+        pickle.dump((train, test), open(datapath, 'wb'), pickle.HIGHEST_PROTOCOL)
+
+    return train, test
+
+
+def already_run(result_path):
+    if os.path.exists(result_path):
+        print(f'{result_path} already computed. Skipping')
+        return True
+    return False
+
+
+def print_info(train, test):
+    # print((np.abs(np.corrcoef(ytr, rowvar=False))>0.1).sum())
+    # sys.exit(0)
+
+    print(f'Tr documents {len(train)}')
+    print(f'Te documents {len(test)}')
+    print(f'#features {train.instances.shape[1]}')
+    print(f'#classes {train.labels.shape[1]}')
+
+    # print(f'Train-prev: {train.prevalence()[:,1]}')
+    print(f'Train-counts: {train.counts()}')
+    # print(f'Test-prev: {test.prevalence()[:,1]}')
+    print(f'Test-counts: {test.counts()}')
+    print(f'MLPE: {qp.error.mae(train.prevalence(), test.prevalence()):.5f}')
+
+
+def save_results(npp_results, app_results, result_path):
+    # results are lists of tuples of (true_prevs, estim_prevs)
+    # each true_prevs is an ndarray of ndim=2, but the second dimension is constrained
+    def _prepare_result_lot(lot_results):
+        true_prevs, estim_prevs = lot_results
+        return {
+            'true_prevs': [true_i[:,0].flatten() for true_i in true_prevs],  # removes the constrained prevalence
+            'estim_prevs': [estim_i[:,0].flatten() for estim_i in estim_prevs]  # removes the constrained prevalence
+        }
+    results = {
+        'npp': _prepare_result_lot(npp_results),
+        'app': _prepare_result_lot(app_results),
+    }
+    pickle.dump(results, open(result_path, 'wb'), pickle.HIGHEST_PROTOCOL)
+
+
+def load_results(result_path):
+    def _unpack_result_lot(lot_result):
+        true_prevs = lot_result['true_prevs']
+        true_prevs = [np.vstack([true_i, 1 - true_i]).T for true_i in true_prevs]  # add the constrained prevalence
+        estim_prevs = lot_result['estim_prevs']
+        estim_prevs = [np.vstack([estim_i, 1 - estim_i]).T for estim_i in estim_prevs]  # add the constrained prevalence
+        return true_prevs, estim_prevs
+    results = pickle.load(open(result_path, 'rb'))
+    results = {
+        'npp': _unpack_result_lot(results['npp']),
+        'app': _unpack_result_lot(results['app']),
+    }
+    return results
+    # results_npp = _unpack_result_lot(results['npp'])
+    # results_app = _unpack_result_lot(results['app'])
+    # return results_npp, results_app
+
+
+def run_experiment(dataset_name, model_name, model):
+    result_path = f'{opt.results}/{dataset_name}_{model_name}.pkl'
+    if already_run(result_path):
+        return
+
+    print(f'runing experiment {dataset_name} x {model_name}')
+    train, test = get_dataset(dataset_name)
+    # if train.n_classes>100:
+    #     return
+
+    print_info(train, test)
+
+    model.fit(train)
+
+    results_npp = ml_natural_prevalence_prediction(model, test, sample_size, repeats=100)
+    results_app = ml_artificial_prevalence_prediction(model, test, sample_size, n_prevalences=11, repeats=5)
+    save_results(results_npp, results_app, result_path)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Experiments for multi-label quantification')
+    parser.add_argument('--results', type=str, default='./results', metavar='str',
+                        help=f'path where to store the results')
+    opt = parser.parse_args()
+
+    os.makedirs(opt.results, exist_ok=True)
+
+    for datasetname, (modelname,model) in itertools.product(DATASETS, models()):
+        run_experiment(datasetname, modelname, model)
+
+
+
+
+
--- a/MultiLabel/mlclassification.py
+++ b/MultiLabel/mlclassification.py
@ -0,0 +1,110 @@
+from copy import deepcopy
+
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.preprocessing import StandardScaler
+from skmultilearn.adapt import MLTSVM
+
+from skmultilearn.ensemble import LabelSpacePartitioningClassifier
+from skmultilearn.problem_transform import LabelPowerset
+from skmultilearn.cluster import NetworkXLabelGraphClusterer, LabelCooccurrenceGraphBuilder
+
+from skmultilearn.embedding import SKLearnEmbedder, EmbeddingClassifier
+from sklearn.manifold import SpectralEmbedding
+from sklearn.ensemble import RandomForestRegressor
+from skmultilearn.adapt import MLkNN
+
+
+class MLStackedClassifier:  # aka Funnelling Monolingual
+    def __init__(self, base_estimator=LogisticRegression()):
+        if not hasattr(base_estimator, 'predict_proba'):
+            print('the estimator does not seem to be probabilistic: calibrating')
+            base_estimator = CalibratedClassifierCV(base_estimator)
+        self.base = deepcopy(OneVsRestClassifier(base_estimator))
+        self.meta = deepcopy(OneVsRestClassifier(base_estimator))
+        self.norm = StandardScaler()
+
+    def fit(self, X, y):
+        assert y.ndim==2, 'the dataset does not seem to be multi-label'
+        self.base.fit(X, y)
+        P = self.base.predict_proba(X)
+        P = self.norm.fit_transform(P)
+        self.meta.fit(P, y)
+        return self
+
+    def predict(self, X):
+        P = self.base.predict_proba(X)
+        P = self.norm.transform(P)
+        return self.meta.predict(P)
+
+    def predict_proba(self, X):
+        P = self.base.predict_proba(X)
+        P = self.norm.transform(P)
+        return self.meta.predict_proba(P)
+
+
+class MLStackedRegressor:
+    def __init__(self, base_regressor=Ridge(normalize=True)):
+        self.base = deepcopy(base_regressor)
+        self.meta = deepcopy(base_regressor)
+
+    def fit(self, X, y):
+        assert y.ndim==2, 'the dataset does not seem to be multi-label'
+        self.base.fit(X, y)
+        R = self.base.predict(X)
+        # R = self.norm.fit_transform(R)
+        self.meta.fit(R, y)
+        return self
+
+    def predict(self, X):
+        R = self.base.predict(X)
+        # R = self.norm.transform(R)
+        return self.meta.predict(R)
+
+
+class LabelSpacePartion:
+    def __init__(self, base_estimator=LogisticRegression()):
+        graph_builder = LabelCooccurrenceGraphBuilder(weighted=True, include_self_edges=False)
+        self.classifier = LabelSpacePartitioningClassifier(
+            classifier=LabelPowerset(classifier=base_estimator),
+            clusterer=NetworkXLabelGraphClusterer(graph_builder, method='louvain')
+        )
+
+    def fit(self, X, y):
+        return self.classifier.fit(X, y)
+
+    def predict(self, X):
+        return self.classifier.predict(X).todense().getA()
+
+
+class MLTwinSVM:
+    def __init__(self):
+        self.classifier = MLTSVM()
+
+    def fit(self, X, y):
+        return self.classifier.fit(X, y)
+
+    def predict(self, X):
+        return self.classifier.predict(X).todense().getA()
+
+
+class MLknn:
+    #http://scikit.ml/api/skmultilearn.embedding.classifier.html#skmultilearn.embedding.EmbeddingClassifier
+    #notes: need to install package openne
+    def __init__(self):
+        self.classifier = EmbeddingClassifier(
+            SKLearnEmbedder(SpectralEmbedding(n_components=10)),
+            RandomForestRegressor(n_estimators=10),
+            MLkNN(k=5)
+        )
+
+    def fit(self, X, y):
+        return self.classifier.fit(X, y)
+
+    def predict(self, X):
+        return self.classifier.predict(X).todense().getA()
+
+    def predict_proba(self, X):
+        return self.classifier.predict_proba(X)
+
--- a/MultiLabel/mldata.py
+++ b/MultiLabel/mldata.py
@ -0,0 +1,209 @@
+from typing import List, Union
+
+import numpy as np
+from sklearn.model_selection import train_test_split
+
+from quapy.data import LabelledCollection
+from quapy.functional import artificial_prevalence_sampling
+
+from skmultilearn.model_selection import iterative_train_test_split
+
+class MultilabelledCollection:
+    def __init__(self, instances, labels):
+        assert labels.ndim==2, f'data does not seem to be multilabel {labels}'
+        self.instances = instances
+        self.labels = labels
+        self.classes_ = np.arange(labels.shape[1])
+
+    @classmethod
+    def load(cls, path: str, loader_func: callable):
+        return MultilabelledCollection(*loader_func(path))
+
+    def __len__(self):
+        return self.instances.shape[0]
+
+    def prevalence(self):
+        # return self.labels.mean(axis=0)
+        pos = self.labels.mean(axis=0)
+        neg = 1-pos
+        return np.asarray([neg, pos]).T
+
+    def counts(self):
+        return self.labels.sum(axis=0)
+
+    @property
+    def n_classes(self):
+        return len(self.classes_)
+
+    @property
+    def n_features(self):
+        return self.instances.shape[1]
+
+    @property
+    def binary(self):
+        return False
+
+    def __gen_index(self):
+        return np.arange(len(self))
+
+    def sampling_multi_index(self, size, cat, prev=None):
+        if prev is None:  # no prevalence was indicated; returns an index for uniform sampling
+            return np.random.choice(len(self), size, replace=size > len(self))
+        aux = LabelledCollection(self.__gen_index(), self.labels[:, cat])
+        return aux.sampling_index(size, *[1-prev, prev])
+
+    def uniform_sampling_multi_index(self, size):
+        return np.random.choice(len(self), size, replace=size>len(self))
+
+    def uniform_sampling(self, size):
+        unif_index = self.uniform_sampling_multi_index(size)
+        return self.sampling_from_index(unif_index)
+
+    def sampling(self, size, category, prev=None):
+        prev_index = self.sampling_multi_index(size, category, prev)
+        return self.sampling_from_index(prev_index)
+
+    def sampling_from_index(self, index):
+        documents = self.instances[index]
+        labels = self.labels[index]
+        return MultilabelledCollection(documents, labels)
+
+    def train_test_split(self, train_prop=0.6, random_state=None, iterative=False):
+        if iterative:
+            tr_docs, tr_labels, te_docs, te_labels = \
+                iterative_train_test_split(self.instances, self.labels, test_size=1-train_prop)
+        else:
+            tr_docs, te_docs, tr_labels, te_labels = \
+                train_test_split(self.instances, self.labels, train_size=train_prop, random_state=random_state)
+        return MultilabelledCollection(tr_docs, tr_labels), MultilabelledCollection(te_docs, te_labels)
+
+    def artificial_sampling_generator(self, sample_size, category, n_prevalences=101, repeats=1):
+        dimensions = 2
+        for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats).flatten():
+            yield self.sampling(sample_size, category, prevs)
+
+    def artificial_sampling_index_generator(self, sample_size, category, n_prevalences=101, repeats=1):
+        dimensions = 2
+        for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats).flatten():
+            yield self.sampling_multi_index(sample_size, category, prevs)
+
+    def natural_sampling_generator(self, sample_size, repeats=100):
+        for _ in range(repeats):
+            yield self.uniform_sampling(sample_size)
+
+    def natural_sampling_index_generator(self, sample_size, repeats=100):
+        for _ in range(repeats):
+            yield self.uniform_sampling_multi_index(sample_size)
+
+    def asLabelledCollection(self, category):
+        return LabelledCollection(self.instances, self.labels[:,category])
+
+    def genLabelledCollections(self):
+        for c in self.classes_:
+            yield self.asLabelledCollection(c)
+
+    # @property
+    # def label_cardinality(self):
+    #     return self.labels.sum()/len(self)
+
+    @property
+    def Xy(self):
+        return self.instances, self.labels
+
+
+class MultilingualLabelledCollection:
+    def __init__(self, langs:List[str], labelledCollections:List[Union[LabelledCollection, MultilabelledCollection]]):
+        assert len(langs) == len(labelledCollections), 'length mismatch for langs and labelledCollection lists'
+        assert all(isinstance(lc, LabelledCollection) or all(isinstance(lc, MultilabelledCollection)) for lc in labelledCollections), \
+            'unexpected type for labelledCollections'
+        assert all(labelledCollections[0].classes_ == lc_i.classes_ for lc_i in labelledCollections[1:]), \
+            'inconsistent classes found for some labelled collections'
+        self.llc = {l: lc for l, lc in zip(langs, labelledCollections)}
+        self.classes_=labelledCollections[0].classes_
+
+    @classmethod
+    def fromLangDict(cls, lang_labelledCollection:dict):
+        return MultilingualLabelledCollection(*list(zip(*list(lang_labelledCollection.items()))))
+
+    def langs(self):
+        return list(sorted(self.llc.keys()))
+
+    def __getitem__(self, lang)->LabelledCollection:
+        return self.llc[lang]
+
+    @classmethod
+    def load(cls, path: str, loader_func: callable):
+        return MultilingualLabelledCollection(*loader_func(path))
+
+    def __len__(self):
+        return sum(map(len, self.llc.values()))
+
+    def prevalence(self):
+        prev = np.asarray([lc.prevalence() * len(lc) for lc in self.llc.values()]).sum(axis=0)
+        return prev / prev.sum()
+
+    def language_prevalence(self):
+        lang_count = np.asarray([len(self.llc[l]) for l in self.langs()])
+        return lang_count / lang_count.sum()
+
+    def counts(self):
+        return np.asarray([lc.counts() for lc in self.llc.values()]).sum(axis=0)
+
+    @property
+    def n_classes(self):
+        return len(self.classes_)
+
+    @property
+    def binary(self):
+        return self.n_classes == 2
+
+    def __check_langs(self, l_dict:dict):
+        assert len(l_dict)==len(self.langs()), 'wrong number of languages'
+        assert all(l in l_dict for l in self.langs()), 'missing languages in l_sizes'
+
+    def __check_sizes(self, l_sizes: Union[int,dict]):
+        assert isinstance(l_sizes, int) or isinstance(l_sizes, dict), 'unexpected type for l_sizes'
+        if isinstance(l_sizes, int):
+            return {l:l_sizes for l in self.langs()}
+        self.__check_langs(l_sizes)
+        return l_sizes
+
+    def sampling_index(self, l_sizes: Union[int,dict], *prevs, shuffle=True):
+        l_sizes = self.__check_sizes(l_sizes)
+        return {l:lc.sampling_index(l_sizes[l], *prevs, shuffle=shuffle) for l,lc in self.llc.items()}
+
+    def uniform_sampling_index(self, l_sizes: Union[int, dict]):
+        l_sizes = self.__check_sizes(l_sizes)
+        return {l: lc.uniform_sampling_index(l_sizes[l]) for l,lc in self.llc.items()}
+
+    def uniform_sampling(self, l_sizes: Union[int, dict]):
+        l_sizes = self.__check_sizes(l_sizes)
+        return MultilingualLabelledCollection.fromLangDict(
+            {l: lc.uniform_sampling(l_sizes[l]) for l,lc in self.llc.items()}
+        )
+
+    def sampling(self, l_sizes: Union[int, dict], *prevs, shuffle=True):
+        l_sizes = self.__check_sizes(l_sizes)
+        return MultilingualLabelledCollection.fromLangDict(
+            {l: lc.sampling(l_sizes[l], *prevs, shuffle=shuffle) for l,lc in self.llc.items()}
+        )
+
+    def sampling_from_index(self, l_index:dict):
+        self.__check_langs(l_index)
+        return MultilingualLabelledCollection.fromLangDict(
+            {l: lc.sampling_from_index(l_index[l]) for l,lc in self.llc.items()}
+        )
+
+    def split_stratified(self, train_prop=0.6, random_state=None):
+        train, test = list(zip(*[self[l].split_stratified(train_prop, random_state) for l in self.langs()]))
+        return MultilingualLabelledCollection(self.langs(), train), MultilingualLabelledCollection(self.langs(), test)
+
+    def asLabelledCollection(self, return_langs=False):
+        lXy_list = [([l]*len(lc),*lc.Xy) for l, lc in self.llc.items()]  # a list with (lang_i, Xi, yi)
+        ls,Xs,ys = list(zip(*lXy_list))
+        ls = np.concatenate(ls)
+        vertstack = vstack if issparse(Xs[0]) else np.vstack
+        Xs = vertstack(Xs)
+        ys = np.concatenate(ys)
+        lc = LabelledCollection(Xs, ys, classes_=self.classes_)
+        # return lc, ls if return_langs else lc
--- a/MultiLabel/mlevaluation.py
+++ b/MultiLabel/mlevaluation.py
@ -0,0 +1,117 @@
+from typing import Union, Callable
+
+import numpy as np
+import quapy as qp
+from MultiLabel.mlquantification import MLAggregativeQuantifier
+from mldata import MultilabelledCollection
+import itertools
+from tqdm import tqdm
+
+
+def check_error_str(error_metric):
+    if isinstance(error_metric, str):
+        error_metric = qp.error.from_name(error_metric)
+
+    assert hasattr(error_metric, '__call__'), 'invalid error function'
+    return error_metric
+
+
+def _ml_prevalence_predictions(model,
+                               test: MultilabelledCollection,
+                               test_indexes):
+
+    predict_batch_fn = _predict_quantification_batch
+    if isinstance(model, MLAggregativeQuantifier):
+        test = MultilabelledCollection(model.preclassify(test.instances), test.labels)
+        predict_batch_fn = _predict_aggregative_batch
+
+    args = tuple([model, test, test_indexes])
+    true_prevs, estim_prevs = predict_batch_fn(args)
+    return true_prevs, estim_prevs
+
+
+def ml_natural_prevalence_prediction(model,
+                                     test:MultilabelledCollection,
+                                     sample_size,
+                                     repeats=100,
+                                     random_seed=42):
+
+    with qp.util.temp_seed(random_seed):
+        test_indexes = list(test.natural_sampling_index_generator(sample_size=sample_size, repeats=repeats))
+
+    return _ml_prevalence_predictions(model, test, test_indexes)
+
+
+def ml_natural_prevalence_evaluation(model,
+                                     test:MultilabelledCollection,
+                                     sample_size,
+                                     repeats=100,
+                                     error_metric:Union[str,Callable]='mae',
+                                     random_seed=42):
+
+    error_metric = check_error_str(error_metric)
+
+    true_prevs, estim_prevs = ml_natural_prevalence_prediction(model, test, sample_size, repeats, random_seed)
+
+    errs = [error_metric(true_prev_i, estim_prev_i) for true_prev_i, estim_prev_i in zip(true_prevs, estim_prevs)]
+    return np.mean(errs)
+
+
+def ml_artificial_prevalence_prediction(model,
+                                        test:MultilabelledCollection,
+                                        sample_size,
+                                        n_prevalences=21,
+                                        repeats=10,
+                                        random_seed=42):
+
+    nested_test_indexes = []
+    with qp.util.temp_seed(random_seed):
+        for cat in test.classes_:
+            nested_test_indexes.append(list(test.artificial_sampling_index_generator(sample_size=sample_size,
+                                                                              category=cat,
+                                                                              n_prevalences=n_prevalences,
+                                                                              repeats=repeats)))
+    def _predict_batch(test_indexes):
+        return _ml_prevalence_predictions(model, test, test_indexes)
+
+    predictions = qp.util.parallel(_predict_batch, nested_test_indexes, n_jobs=-1)
+    true_prevs = list(itertools.chain.from_iterable(trues for trues, estims in predictions))
+    estim_prevs = list(itertools.chain.from_iterable(estims for trues, estims in predictions))
+    return true_prevs, estim_prevs
+
+
+def ml_artificial_prevalence_evaluation(model,
+                                        test:MultilabelledCollection,
+                                        sample_size,
+                                        n_prevalences=21,
+                                        repeats=10,
+                                        error_metric:Union[str,Callable]='mae',
+                                        random_seed=42):
+
+    error_metric = check_error_str(error_metric)
+
+    true_prevs, estim_prevs = ml_artificial_prevalence_prediction(model, test, sample_size, n_prevalences, repeats, random_seed)
+
+    errs = [error_metric(true_prev_i, estim_prev_i) for true_prev_i, estim_prev_i in zip(true_prevs, estim_prevs)]
+    return np.mean(errs)
+
+
+def _predict_quantification_batch(args):
+    model, test, indexes = args
+    return __predict_batch_fn(args, model.quantify)
+
+
+def _predict_aggregative_batch(args):
+    model, test, indexes = args
+    return __predict_batch_fn(args, model.aggregate)
+
+
+def __predict_batch_fn(args, quant_fn):
+    model, test, indexes = args
+    trues, estims = [], []
+    for index in indexes:
+        sample = test.sampling_from_index(index)
+        estims.append(quant_fn(sample.instances))
+        trues.append(sample.prevalence())
+    return trues, estims
+
--- a/MultiLabel/mlquantification.py
+++ b/MultiLabel/mlquantification.py
@ -0,0 +1,361 @@
+import numpy as np
+from copy import deepcopy
+
+import sklearn.preprocessing
+from sklearn.ensemble import StackingRegressor
+from sklearn.metrics import confusion_matrix
+from sklearn.multioutput import MultiOutputRegressor
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import LinearSVC, LinearSVR
+from sklearn.linear_model import LogisticRegression, Ridge, Lasso, LassoCV, MultiTaskLassoCV, LassoLars, LassoLarsCV, \
+    ElasticNet, MultiTaskElasticNetCV, MultiTaskElasticNet, LinearRegression, ARDRegression, BayesianRidge, SGDRegressor
+
+import quapy as qp
+from MultiLabel.mlclassification import MLStackedClassifier, MLStackedRegressor
+from MultiLabel.mldata import MultilabelledCollection
+from method.aggregative import CC, ACC, PACC, AggregativeQuantifier
+from method.base import BaseQuantifier
+
+from abc import abstractmethod
+
+
+class MLQuantifier:
+    @abstractmethod
+    def fit(self, data: MultilabelledCollection): ...
+
+    @abstractmethod
+    def quantify(self, instances): ...
+
+
+class MLMLPE(MLQuantifier):
+    def fit(self, data: MultilabelledCollection):
+        self.tr_prev = data.prevalence()
+        return self
+
+    def quantify(self, instances):
+        return self.tr_prev
+
+
+class MLAggregativeQuantifier(MLQuantifier):
+    def __init__(self, mlcls):
+        self.learner = mlcls
+
+    def fit(self, data:MultilabelledCollection):
+        self.learner.fit(*data.Xy)
+        return self
+
+    @abstractmethod
+    def preclassify(self, instances): ...
+
+    @abstractmethod
+    def aggregate(self, predictions): ...
+
+    def quantify(self, instances):
+        predictions = self.preclassify(instances)
+        return self.aggregate(predictions)
+
+
+class MLCC(MLAggregativeQuantifier):
+    def preclassify(self, instances):
+        return self.learner.predict(instances)
+
+    def aggregate(self, predictions):
+        pos_prev = predictions.mean(axis=0)
+        neg_prev = 1 - pos_prev
+        return np.asarray([neg_prev, pos_prev]).T
+
+
+class MLPCC(MLCC):
+    def preclassify(self, instances):
+        return self.learner.predict_proba(instances)
+
+
+class MLACC(MLCC):
+
+    def fit(self, data:MultilabelledCollection, train_prop=0.6):
+        self.classes_ = data.classes_
+        train, val = data.train_test_split(train_prop=train_prop)
+        self.learner.fit(*train.Xy)
+        val_predictions = self.preclassify(val.instances)
+        self.Pte_cond_estim_ = []
+        for c in data.classes_:
+            pos_c = val.labels[:,c].sum()
+            neg_c = len(val) - pos_c
+            self.Pte_cond_estim_.append(confusion_matrix(val.labels[:,c], val_predictions[:,c]).T / np.array([neg_c, pos_c]))
+        return self
+
+    def preclassify(self, instances):
+        return self.learner.predict(instances)
+
+    def aggregate(self, predictions):
+        cc_prevs = super(MLACC, self).aggregate(predictions)
+        acc_prevs = np.asarray([ACC.solve_adjustment(self.Pte_cond_estim_[c], cc_prevs[c]) for c in self.classes_])
+        return acc_prevs
+
+
+class MLPACC(MLPCC):
+
+    def fit(self, data:MultilabelledCollection, train_prop=0.6):
+        self.classes_ = data.classes_
+        train, val = data.train_test_split(train_prop=train_prop)
+        self.learner.fit(*train.Xy)
+        val_posteriors = self.preclassify(val.instances)
+        self.Pte_cond_estim_ = []
+        for c in data.classes_:
+            pos_posteriors = val_posteriors[:,c]
+            c_posteriors = np.asarray([1-pos_posteriors, pos_posteriors]).T
+            self.Pte_cond_estim_.append(PACC.getPteCondEstim([0,1], val.labels[:,c], c_posteriors))
+        return self
+
+    def aggregate(self, posteriors):
+        pcc_prevs = super(MLPACC, self).aggregate(posteriors)
+        pacc_prevs = np.asarray([ACC.solve_adjustment(self.Pte_cond_estim_[c], pcc_prevs[c]) for c in self.classes_])
+        return pacc_prevs
+
+
+class MLNaiveQuantifier(MLQuantifier):
+    def __init__(self, q:BaseQuantifier, n_jobs=-1):
+        self.q = q
+        self.estimators = None
+        self.n_jobs = n_jobs
+
+    def fit(self, data:MultilabelledCollection):
+        self.classes_ = data.classes_
+
+        def cat_job(lc):
+            return deepcopy(self.q).fit(lc)
+
+        self.estimators = qp.util.parallel(cat_job, data.genLabelledCollections(), n_jobs=self.n_jobs)
+        return self
+
+    def quantify(self, instances):
+        pos_prevs = np.zeros(len(self.classes_), dtype=float)
+        for c in self.classes_:
+            pos_prevs[c] = self.estimators[c].quantify(instances)[1]
+        neg_prevs = 1-pos_prevs
+        return np.asarray([neg_prevs, pos_prevs]).T
+
+
+class MLNaiveAggregativeQuantifier(MLNaiveQuantifier, MLAggregativeQuantifier):
+    def __init__(self, q:AggregativeQuantifier, n_jobs=-1):
+        assert isinstance(q, AggregativeQuantifier), 'the quantifier is not of type aggregative!'
+        self.q = q
+        self.estimators = None
+        self.n_jobs = n_jobs
+
+    def preclassify(self, instances):
+        return np.asarray([q.preclassify(instances) for q in self.estimators]).swapaxes(0,1)
+
+    def aggregate(self, predictions):
+        pos_prevs = np.zeros(len(self.classes_), dtype=float)
+        for c in self.classes_:
+            pos_prevs[c] = self.estimators[c].aggregate(predictions[:,c])[1]
+        neg_prevs = 1 - pos_prevs
+        return np.asarray([neg_prevs, pos_prevs]).T
+
+    def quantify(self, instances):
+        predictions = self.preclassify(instances)
+        return self.aggregate(predictions)
+
+
+class MLRegressionQuantification:
+    def __init__(self,
+                 mlquantifier=MLNaiveQuantifier(CC(LinearSVC())),
+                 regression='ridge',
+                 protocol='npp',
+                 n_samples=500,
+                 sample_size=500,
+                 norm=True,
+                 means=True,
+                 stds=True):
+
+        assert protocol in ['npp', 'app'], 'unknown protocol'
+        self.estimator = mlquantifier
+        if isinstance(regression, str):
+            assert regression in ['ridge', 'svr'], 'unknown regression model'
+            if regression == 'ridge':
+                self.reg = Ridge(normalize=norm)
+            elif regression == 'svr':
+                self.reg = MultiOutputRegressor(LinearSVR())
+        else:
+            self.reg = regression
+        self.protocol = protocol
+        # self.reg = MultiTaskLassoCV(normalize=norm)
+        # self.reg = KernelRidge(kernel='rbf')
+        # self.reg = LassoLarsCV(normalize=norm)
+        # self.reg = MultiTaskElasticNetCV(normalize=norm) <- bien
+        #self.reg = LinearRegression(normalize=norm) # <- bien
+        # self.reg = MultiOutputRegressor(ARDRegression(normalize=norm))  # <- bastante bien, incluso sin norm
+        # self.reg = MultiOutputRegressor(BayesianRidge(normalize=False))  # <- bastante bien, incluso sin norm
+        # self.reg = MultiOutputRegressor(SGDRegressor())  # lento, no va
+        self.regression = regression
+        self.n_samples = n_samples
+        self.sample_size = sample_size
+        # self.norm = StandardScaler()
+        self.means = means
+        self.stds = stds
+        # self.covs = covs
+
+    def _prepare_arrays(self, Xs, ys, samples_mean, samples_std):
+        Xs = np.asarray(Xs)
+        ys = np.asarray(ys)
+        if self.means:
+            samples_mean = np.asarray(samples_mean)
+            Xs = np.hstack([Xs, samples_mean])
+        if self.stds:
+            samples_std = np.asarray(samples_std)
+            Xs = np.hstack([Xs, samples_std])
+        # if self.covs:
+
+        return Xs, ys
+
+    def _extract_features(self, sample, Xs, ys, samples_mean, samples_std):
+        ys.append(sample.prevalence()[:, 1])
+        Xs.append(self.estimator.quantify(sample.instances)[:, 1])
+        if self.means:
+            samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
+        if self.stds:
+            samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
+
+    def generate_samples_npp(self, val):
+        Xs, ys = [], []
+        samples_mean, samples_std = [], []
+        for sample in val.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
+            self._extract_features(sample, Xs, ys, samples_mean, samples_std)
+        return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
+
+
+    def generate_samples_app(self, val):
+        Xs, ys = [], []
+        samples_mean, samples_std = [], []
+        ncats = len(self.classes_)
+        nprevs  = 21
+        repeats = max(self.n_samples // (ncats * nprevs), 1)
+        for cat in self.classes_:
+            for sample in val.artificial_sampling_generator(sample_size=self.sample_size, category=cat, n_prevalences=nprevs, repeats=repeats):
+                self._extract_features(sample, Xs, ys, samples_mean, samples_std)
+        return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
+
+    def fit(self, data:MultilabelledCollection):
+        self.classes_ = data.classes_
+        tr, val = data.train_test_split()
+        self.estimator.fit(tr)
+        if self.protocol == 'npp':
+            Xs, ys = self.generate_samples_npp(val)
+        elif self.protocol == 'app':
+            Xs, ys = self.generate_samples_app(val)
+        # Xs = self.norm.fit_transform(Xs)
+        self.reg.fit(Xs, ys)
+        return self
+
+    def quantify(self, instances):
+        Xs = self.estimator.quantify(instances)[:,1].reshape(1,-1)
+        if self.means:
+            sample_mean = instances.mean(axis=0).getA()
+            Xs = np.hstack([Xs, sample_mean])
+        if self.stds:
+            sample_std = instances.todense().std(axis=0).getA()
+            Xs = np.hstack([Xs, sample_std])
+        # Xs = self.norm.transform(Xs)
+        Xs = self.reg.predict(Xs)
+        # Xs = self.norm.inverse_transform(Xs)
+        adjusted = np.clip(Xs, 0, 1)
+        adjusted = adjusted.flatten()
+        neg_prevs = 1-adjusted
+        return np.asarray([neg_prevs, adjusted]).T
+
+
+class StackMLRQuantifier:
+    def __init__(self,
+                 mlquantifier=MLNaiveQuantifier(CC(LinearSVC())),
+                 regression='ridge',
+                 protocol='npp',
+                 n_samples=500,
+                 sample_size=500,
+                 norm=True,
+                 means=True,
+                 stds=True):
+        if regression == 'ridge':
+            reg = MLStackedRegressor(Ridge(normalize=True))
+        elif regression == 'svr':
+            reg = MLStackedRegressor(MultiOutputRegressor(LinearSVR()))
+        else:
+            ValueError(f'unknown regressor {regression}')
+
+        self.base = MLRegressionQuantification(
+            mlquantifier=mlquantifier,
+            regression=reg,
+            protocol=protocol,
+            n_samples=n_samples,
+            sample_size=sample_size,
+            norm=norm,
+            means=means,
+            stds=stds)
+
+    def fit(self, data:MultilabelledCollection):
+        self.classes_ = data.classes_
+        self.base.fit(data)
+        return self
+
+    def quantify(self, instances):
+        return self.base.quantify(instances)
+
+
+class MLadjustedCount(MLAggregativeQuantifier):
+    def __init__(self, learner):
+        self.learner = learner
+
+    def preclassify(self, instances):
+        return self.learner.predict(instances)
+
+    def fit(self, data: MultilabelledCollection, train_prop=0.6):
+        self.classes_ = data.classes_
+        train, val = data.train_test_split(train_prop=train_prop)
+        self.learner.fit(*train.Xy)
+        val_predictions = self.preclassify(val.instances)
+        val_true = val.labels
+
+        N = len(val)
+        C = val_predictions.T.dot(val_true) / N  # join probabilities [[P(y1,\hat{y}1), P(y2,\hat{y}1)], ... ]
+        priorP = val_predictions.mean(axis=0).reshape(-1,1)  # priors [P(hat{y}1), P(hat{y}2), ...]
+        self.Pte_cond_estim_ = np.true_divide(C, priorP, where=priorP>0)  # cond probabilities [[P(y1|\hat{y}1), P(y2|\hat{y}1)], ... ]
+
+        return self
+
+    def aggregate(self, predictions):
+        P = sklearn.preprocessing.normalize(predictions, norm='l1')
+        correction = P.dot(self.Pte_cond_estim_)
+        adjusted = correction.mean(axis=0)
+        return np.asarray([1-adjusted, adjusted]).T
+
+
+class MLprobAdjustedCount(MLAggregativeQuantifier):
+    def __init__(self, learner):
+        self.learner = learner
+
+    def preclassify(self, instances):
+        return self.learner.predict_proba(instances)
+
+    def fit(self, data: MultilabelledCollection, train_prop=0.6):
+        self.classes_ = data.classes_
+        train, val = data.train_test_split(train_prop=train_prop)
+        self.learner.fit(*train.Xy)
+        val_predictions = self.preclassify(val.instances)
+        val_true = val.labels
+
+        N = len(val)
+
+        C = (val_predictions>0.5).T.dot(val_true) / N  # join probabilities [[P(y1,\hat{y}1), P(y2,\hat{y}1)], ... ]
+        # not sure...
+
+
+        priorP = val_predictions.mean(axis=0).reshape(-1,1)  # priors [P(hat{y}1), P(hat{y}2), ...]
+        self.Pte_cond_estim_ = np.true_divide(C, priorP, where=priorP>0)  # cond probabilities [[P(y1|\hat{y}1), P(y2|\hat{y}1)], ... ]
+
+        return self
+
+    def aggregate(self, predictions):
+        P = sklearn.preprocessing.normalize(predictions, norm='l1')
+        correction = P.dot(self.Pte_cond_estim_)
+        adjusted = correction.mean(axis=0)
+        return np.asarray([1-adjusted, adjusted]).T
--- a/MultiLabel/results_reuters21578.txt
+++ b/MultiLabel/results_reuters21578.txt
@ -0,0 +1,79 @@
+num categories = 10
+Train-counts: [1650  181  389 2877  433  347  538  197  369  212]
+Test-counts: [ 719   56  189 1087  149  131  179   89  117   71]
+MLPE: 0.01101
+
+NPP:
+NaiveCC   	mae=0.01718
+NaivePCC  	mae=0.00898
+NaiveACC  	mae=0.01560
+NaivePACC 	mae=0.01062
+
+StackCC   	mae=0.00790
+StackPCC  	mae=0.00659 **
+StackACC  	mae=0.00913
+StackPACC 	mae=0.00771
+
+ChainCC   	mae=0.01644
+ChainPCC  	mae=0.00924
+ChainACC  	mae=0.01767
+ChainPACC 	mae=0.01140
+
+MRQ-CC    	mae=0.01130
+MRQ-PCC   	mae=0.00941
+MRQ-ACC   	mae=0.01153
+MRQ-PACC  	mae=0.01000
+
+MRQ-StackCC	mae=0.00757
+MRQ-StackPCC	mae=0.00652 **
+MRQ-StackACC	mae=0.00799
+MRQ-StackPACC	mae=0.00763
+
+MRQ-StackCC-app	mae=0.00791
+MRQ-StackPCC-appmae=0.00840
+MRQ-StackACC-appmae=0.00910
+MRQ-StackPACC-apmae=0.00941
+
+MRQ-ChainCC	mae=0.00989
+MRQ-ChainPCC	mae=0.00916
+MRQ-ChainACC	mae=0.01251
+MRQ-ChainPACC	mae=0.00954
+
+APP:
+NaiveCC   	mae=0.04120
+NaivePCC  	mae=0.03741
+NaiveACC  	mae=0.03202
+NaivePACC 	mae=0.02293
+
+StackCC   	mae=0.01969
+StackPCC  	mae=0.01871
+StackACC  	mae=0.01386 **
+StackPACC 	mae=0.01267 **
+
+ChainCC   	mae=0.04136
+ChainPCC  	mae=0.03571
+ChainACC  	mae=0.03622
+ChainPACC 	mae=0.02659
+
+MRQ-CC    	mae=0.04356
+MRQ-PCC   	mae=0.02532
+MRQ-ACC   	mae=0.05716
+MRQ-PACC  	mae=0.02936
+
+MRQ-StackCC	mae=0.02448
+MRQ-StackPCC	mae=0.02090
+MRQ-StackACC	mae=0.02579
+MRQ-StackPACC	mae=0.02388
+
+MRQ-StackCC-app	mae=0.01535
+MRQ-StackPCC-appmae=0.01457
+MRQ-StackACC-appmae=0.01441
+MRQ-StackPACC-apmae=0.01633
+
+MRQ-ChainCC	mae=0.04874
+MRQ-ChainPCC	mae=0.02537
+MRQ-ChainACC	mae=0.06262
+MRQ-ChainPACC	mae=0.02906
+
+
+
--- a/MultiLabel/tabular.py
+++ b/MultiLabel/tabular.py
@ -0,0 +1,347 @@
+import numpy as np
+import itertools
+from scipy.stats import ttest_ind_from_stats, wilcoxon
+
+
+class Table:
+    VALID_TESTS = [None, "wilcoxon", "ttest"]
+
+    def __init__(self, benchmarks, methods, lower_is_better=True, significance_test='ttest', prec_mean=3,
+                 clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--',
+                 color=True):
+        assert significance_test in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}'
+
+        self.benchmarks = np.asarray(benchmarks)
+        self.benchmark_index = {row: i for i, row in enumerate(benchmarks)}
+
+        self.methods = np.asarray(methods)
+        self.method_index = {col: j for j, col in enumerate(methods)}
+
+        self.map = {}
+        # keyed (#rows,#cols)-ndarrays holding computations from self.map['values']
+        self._addmap('values', dtype=object)
+        self.lower_is_better = lower_is_better
+        self.ttest = significance_test
+        self.prec_mean = prec_mean
+        self.clean_zero = clean_zero
+        self.show_std = show_std
+        self.prec_std = prec_std
+        self.add_average = average
+        self.missing = missing
+        self.missing_str = missing_str
+        self.color = color
+
+        self.touch()
+
+    @property
+    def nbenchmarks(self):
+        return len(self.benchmarks)
+
+    @property
+    def nmethods(self):
+        return len(self.methods)
+
+    def touch(self):
+        self._modif = True
+
+    def update(self):
+        if self._modif:
+            self.compute()
+
+    def _getfilled(self):
+        return np.argwhere(self.map['fill'])
+
+    @property
+    def values(self):
+        return self.map['values']
+
+    def _indexes(self):
+        return itertools.product(range(self.nbenchmarks), range(self.nmethods))
+
+    def _addmap(self, map, dtype, func=None):
+        self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype)
+        if func is None:
+            return
+        m = self.map[map]
+        f = func
+        indexes = self._indexes() if map == 'fill' else self._getfilled()
+        for i, j in indexes:
+            m[i, j] = f(self.values[i, j])
+
+    def _addrank(self):
+        for i in range(self.nbenchmarks):
+            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
+            col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
+            ranked_cols_idx = filled_cols_idx[np.argsort(col_means)]
+            if not self.lower_is_better:
+                ranked_cols_idx = ranked_cols_idx[::-1]
+            self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx) + 1)
+
+    def _addcolor(self):
+        for i in range(self.nbenchmarks):
+            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
+            if filled_cols_idx.size == 0:
+                continue
+            col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
+            minval = min(col_means)
+            maxval = max(col_means)
+            for col_idx in filled_cols_idx:
+                val = self.map['mean'][i, col_idx]
+                norm = (maxval - minval)
+                if norm > 0:
+                    normval = (val - minval) / norm
+                else:
+                    normval = 0.5
+                if self.lower_is_better:
+                    normval = 1 - normval
+                self.map['color'][i, col_idx] = color_red2green_01(normval)
+
+    def _run_ttest(self, row, col1, col2):
+        mean1 = self.map['mean'][row, col1]
+        std1 = self.map['std'][row, col1]
+        nobs1 = self.map['nobs'][row, col1]
+        mean2 = self.map['mean'][row, col2]
+        std2 = self.map['std'][row, col2]
+        nobs2 = self.map['nobs'][row, col2]
+        _, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
+        return p_val
+
+    def _run_wilcoxon(self, row, col1, col2):
+        values1 = self.map['values'][row, col1]
+        values2 = self.map['values'][row, col2]
+        _, p_val = wilcoxon(values1, values2)
+        return p_val
+
+    def _add_statistical_test(self):
+        if self.ttest is None:
+            return
+        self.some_similar = [False] * self.nmethods
+        for i in range(self.nbenchmarks):
+            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
+            if len(filled_cols_idx) <= 1:
+                continue
+            col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
+            best_pos = filled_cols_idx[np.argmin(col_means)]
+
+            for j in filled_cols_idx:
+                if j == best_pos:
+                    continue
+                if self.ttest == 'ttest':
+                    p_val = self._run_ttest(i, best_pos, j)
+                else:
+                    p_val = self._run_wilcoxon(i, best_pos, j)
+
+                pval_outcome = pval_interpretation(p_val)
+                self.map['ttest'][i, j] = pval_outcome
+                if pval_outcome != 'Diff':
+                    self.some_similar[j] = True
+
+    def compute(self):
+        self._addmap('fill', dtype=bool, func=lambda x: x is not None)
+        self._addmap('mean', dtype=float, func=np.mean)
+        self._addmap('std', dtype=float, func=np.std)
+        self._addmap('nobs', dtype=float, func=len)
+        self._addmap('rank', dtype=int, func=None)
+        self._addmap('color', dtype=object, func=None)
+        self._addmap('ttest', dtype=object, func=None)
+        self._addmap('latex', dtype=object, func=None)
+        self._addrank()
+        self._addcolor()
+        self._add_statistical_test()
+        if self.add_average:
+            self._addave()
+        self._modif = False
+
+    def _is_column_full(self, col):
+        return all(self.map['fill'][:, self.method_index[col]])
+
+    def _addave(self):
+        ave = Table(['ave'], self.methods, lower_is_better=self.lower_is_better, significance_test=self.ttest, average=False,
+                    missing=self.missing, missing_str=self.missing_str, prec_mean=self.prec_mean, prec_std=self.prec_std,
+                    show_std=self.show_std)
+        for col in self.methods:
+            values = None
+            if self._is_column_full(col):
+                if self.ttest == 'ttest':
+                    values = np.asarray(self.map['mean'][:, self.method_index[col]])
+                else:  # wilcoxon
+                    values = np.concatenate(self.values[:, self.method_index[col]])
+            ave.add('ave', col, values)
+        self.average = ave
+
+    def add(self, benchmark, method, values):
+        if values is not None:
+            values = np.asarray(values)
+            if values.ndim == 0:
+                values = values.flatten()
+        rid, cid = self._coordinates(benchmark, method)
+        if self.map['values'][rid, cid] is None:
+            self.map['values'][rid, cid] = values
+        elif values is not None:
+            self.map['values'][rid, cid] = np.concatenate([self.map['values'][rid, cid], values])
+        self.touch()
+
+    def get(self, benchmark, method, attr='mean'):
+        self.update()
+        assert attr in self.map, f'unknwon attribute {attr}'
+        rid, cid = self._coordinates(benchmark, method)
+        if self.map['fill'][rid, cid]:
+            v = self.map[attr][rid, cid]
+            if v is None or (isinstance(v, float) and np.isnan(v)):
+                return self.missing
+            return v
+        else:
+            return self.missing
+
+    def _coordinates(self, benchmark, method):
+        assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range'
+        assert method in self.method_index, f'method {method} out of range'
+        rid = self.benchmark_index[benchmark]
+        cid = self.method_index[method]
+        return rid, cid
+
+    def get_average(self, method, attr='mean'):
+        self.update()
+        if self.add_average:
+            return self.average.get('ave', method, attr=attr)
+        return None
+
+    def get_color(self, benchmark, method):
+        color = self.get(benchmark, method, attr='color')
+        if color is None:
+            return ''
+        return color
+
+    def latexCell(self, benchmark, method):
+        self.update()
+        i, j = self._coordinates(benchmark, method)
+        if self.map['fill'][i, j] == False:
+            return self.missing_str
+
+        mean = self.map['mean'][i, j]
+        l = f" {mean:.{self.prec_mean}f}"
+        if self.clean_zero:
+            l = l.replace(' 0.', '.')
+
+        isbest = self.map['rank'][i, j] == 1
+        if isbest:
+            l = "\\textbf{" + l.strip() + "}"
+
+        stat = ''
+        if self.ttest is not None and self.some_similar[j]:
+            test_label = self.map['ttest'][i, j]
+            if test_label == 'Sim':
+                stat = '^{\dag\phantom{\dag}}'
+            elif test_label == 'Same':
+                stat = '^{\ddag}'
+            elif isbest or test_label == 'Diff':
+                stat = '^{\phantom{\ddag}}'
+
+        std = ''
+        if self.show_std:
+            std = self.map['std'][i, j]
+            std = f" {std:.{self.prec_std}f}"
+            if self.clean_zero:
+                std = std.replace(' 0.', '.')
+            std = f" \pm {std:{self.prec_std}}"
+
+        if stat != '' or std != '':
+            l = f'{l}${stat}{std}$'
+
+        if self.color:
+            l += ' ' + self.map['color'][i, j]
+
+        return l
+
+    def latexTabular(self, benchmark_replace={}, method_replace={}, average=True):
+        tab = ' & '
+        tab += ' & '.join([method_replace.get(col, col) for col in self.methods])
+        tab += ' \\\\\hline\n'
+        for row in self.benchmarks:
+            rowname = benchmark_replace.get(row, row)
+            tab += rowname + ' & '
+            tab += self.latexRow(row)
+
+        if average:
+            tab += '\hline\n'
+            tab += 'Average & '
+            tab += self.latexAverage()
+        return tab
+
+    def latexTabularT(self, benchmark_replace={}, method_replace={}, average=True, side=False):
+        def withside(label):
+            return '\side{'+label+'}' if side else label
+
+        tab = ' & '
+        tab += ' & '.join([withside(benchmark_replace.get(col, col)) for col in self.benchmarks])
+        if average:
+            tab += ' & ' + withside('Ave')
+        tab += ' \\\\\hline\n'
+        for row in self.methods:
+            rowname = method_replace.get(row, row)
+            tab += rowname + ' & '
+            tab += self.latexRowT(row, endl='')
+            if average:
+                tab += ' & '
+                tab += self.average.latexCell('ave', row)
+                tab += '\\\\\hline\n'
+        return tab
+
+    def latexRow(self, benchmark, endl='\\\\\hline\n'):
+        s = [self.latexCell(benchmark, col) for col in self.methods]
+        s = ' & '.join(s)
+        s += ' ' + endl
+        return s
+
+    def latexRowT(self, method, endl='\\\\\hline\n'):
+        s = [self.latexCell(benchmark, method) for benchmark in self.benchmarks]
+        s = ' & '.join(s)
+        s += ' ' + endl
+        return s
+
+    def latexAverage(self, endl='\\\\\hline\n'):
+        if self.add_average:
+            return self.average.latexRow('ave', endl=endl)
+
+    def getRankTable(self):
+        t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=0, average=True)
+        for rid, cid in self._getfilled():
+            row = self.benchmarks[rid]
+            col = self.methods[cid]
+            t.add(row, col, self.get(row, col, 'rank'))
+        t.compute()
+        return t
+
+    def dropMethods(self, methods):
+        drop_index = [self.method_index[m] for m in methods]
+        new_methods = np.delete(self.methods, drop_index)
+        new_index = {col: j for j, col in enumerate(new_methods)}
+
+        self.map['values'] = self.values[:, np.asarray([self.method_index[m] for m in new_methods], dtype=int)]
+        self.methods = new_methods
+        self.method_index = new_index
+        self.touch()
+
+
+def pval_interpretation(p_val):
+    if 0.005 >= p_val:
+        return 'Diff'
+    elif 0.05 >= p_val > 0.005:
+        return 'Sim'
+    elif p_val > 0.05:
+        return 'Same'
+
+
+def color_red2green_01(val, maxtone=50):
+    if np.isnan(val): return None
+    assert 0 <= val <= 1, f'val {val} out of range [0,1]'
+
+    # rescale to [-1,1]
+    val = val * 2 - 1
+    if val < 0:
+        color = 'red'
+        tone = maxtone * (-val)
+    else:
+        color = 'green'
+        tone = maxtone * val
+    return '\cellcolor{' + color + f'!{int(tone)}' + '}'
--- a/MultiLabel/util/init.py
+++ b/MultiLabel/util/init.py
--- a/MultiLabel/util/common.py
+++ b/MultiLabel/util/common.py
@ -0,0 +1,145 @@
+import warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning)
+import numpy as np
+from tqdm import tqdm
+import torch
+from scipy.sparse import vstack, issparse
+from joblib import Parallel, delayed
+import multiprocessing
+import itertools
+
+
+def index(data, vocab, known_words, analyzer, unk_index, out_of_vocabulary):
+    """
+    Index (i.e., replaces word strings with numerical indexes) a list of string documents
+    :param data: list of string documents
+    :param vocab: a fixed mapping [str]->[int] of words to indexes
+    :param known_words: a set of known words (e.g., words that, despite not being included in the vocab, can be retained
+    because they are anyway contained in a pre-trained embedding set that we know in advance)
+    :param analyzer: the preprocessor in charge of transforming the document string into a chain of string words
+    :param unk_index: the index of the 'unknown token', i.e., a symbol that characterizes all words that we cannot keep
+    :param out_of_vocabulary: an incremental mapping [str]->[int] of words to indexes that will index all those words that
+    are not in the original vocab but that are in the known_words
+    :return:
+    """
+    indexes=[]
+    vocabsize = len(vocab)
+    unk_count = 0
+    knw_count = 0
+    out_count = 0
+    pbar = tqdm(data, desc=f'indexing documents')
+    for text in pbar:
+        words = analyzer(text)
+        index = []
+        for word in words:
+            if word in vocab:
+                idx = vocab[word]
+            else:
+                if word in known_words:
+                    if word not in out_of_vocabulary:
+                        out_of_vocabulary[word] = vocabsize+len(out_of_vocabulary)
+                    idx = out_of_vocabulary[word]
+                    out_count += 1
+                else:
+                    idx = unk_index
+                    unk_count += 1
+            index.append(idx)
+        indexes.append(index)
+        knw_count += len(index)
+        pbar.set_description(f'[unk = {unk_count}/{knw_count}={(100.*unk_count/knw_count):.2f}%]'
+                             f'[out = {out_count}/{knw_count}={(100.*out_count/knw_count):.2f}%]')
+    return indexes
+
+
+def define_pad_length(index_list):
+    lengths = [len(index) for index in index_list]
+    return int(np.mean(lengths)+np.std(lengths))
+
+
+def pad(index_list, pad_index, max_pad_length=None):
+    pad_length = np.max([len(index) for index in index_list])
+    if max_pad_length is not None:
+        pad_length = min(pad_length, max_pad_length)
+    for i,indexes in enumerate(index_list):
+        index_list[i] = [pad_index]*(pad_length-len(indexes)) + indexes[:pad_length]
+    return index_list
+
+
+def get_word_list(word2index1, word2index2=None): #TODO: redo
+    def extract_word_list(word2index):
+        return [w for w,i in sorted(word2index.items(), key=lambda x: x[1])]
+    word_list = extract_word_list(word2index1)
+    if word2index2 is not None:
+        word_list += extract_word_list(word2index2)
+    return word_list
+
+
+def batchify(index_list, labels, batchsize, pad_index, device, target_long=False, max_pad_length=500):
+    nsamples = len(index_list)
+    nbatches = nsamples // batchsize + 1*(nsamples%batchsize>0)
+    for b in range(nbatches):
+        batch = index_list[b*batchsize:(b+1)*batchsize]
+        batch_labels = labels[b*batchsize:(b+1)*batchsize]
+        if issparse(batch_labels):
+            batch_labels = batch_labels.toarray()
+        batch = pad(batch, pad_index=pad_index, max_pad_length=max_pad_length)
+        batch = torch.LongTensor(batch)
+        totype = torch.LongTensor if target_long else torch.FloatTensor
+        target = totype(batch_labels)
+        yield batch.to(device), target.to(device)
+
+
+def batchify_unlabelled(index_list, batchsize, pad_index, device, max_pad_length=500):
+    nsamples = len(index_list)
+    nbatches = nsamples // batchsize + 1*(nsamples%batchsize>0)
+    for b in range(nbatches):
+        batch = index_list[b*batchsize:(b+1)*batchsize]
+        batch = pad(batch, pad_index=pad_index, max_pad_length=max_pad_length)
+        batch = torch.LongTensor(batch)
+        yield batch.to(device)
+
+
+def clip_gradient(model, clip_value=1e-1):
+    params = list(filter(lambda p: p.grad is not None, model.parameters()))
+    for p in params:
+        p.grad.data.clamp_(-clip_value, clip_value)
+
+
+def predict(logits, classification_type='singlelabel'):
+    if classification_type == 'multilabel':
+        prediction = torch.sigmoid(logits) > 0.5
+    elif classification_type == 'singlelabel':
+        prediction = torch.argmax(logits, dim=1).view(-1, 1)
+    else:
+        print('unknown classification type')
+
+    return prediction.detach().cpu().numpy()
+
+
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+
+def get_parallel_slices(n_tasks, n_jobs=-1):
+    if n_jobs==-1:
+        n_jobs = multiprocessing.cpu_count()
+    batch = int(n_tasks / n_jobs)
+    remainder = n_tasks % n_jobs
+    return [slice(job*batch, (job+1)*batch+ (remainder if job == n_jobs - 1 else 0)) for job in range(n_jobs)]
+
+
+def tokenize_job(documents, tokenizer, max_tokens, job):
+    return [tokenizer(d)[:max_tokens] for d in tqdm(documents, desc=f'tokenizing [job: {job}]')]
+
+
+def tokenize_parallel(documents, tokenizer, max_tokens, n_jobs=-1):
+    slices = get_parallel_slices(n_tasks=len(documents), n_jobs=n_jobs)
+    tokens = Parallel(n_jobs=n_jobs)(
+        delayed(tokenize_job)(
+            documents[slice_i], tokenizer, max_tokens, job
+        )
+        for job, slice_i in enumerate(slices)
+    )
+    return list(itertools.chain.from_iterable(tokens))
+
+
--- a/MultiLabel/util/csv_log.py
+++ b/MultiLabel/util/csv_log.py
@ -0,0 +1,60 @@
+import os
+import pandas as pd
+pd.set_option('display.max_rows', 500)
+pd.set_option('display.max_columns', 500)
+pd.set_option('display.width', 1000)
+
+
+class CSVLog:
+
+    def __init__(self, file, columns=None, autoflush=True, verbose=False, overwrite=False):
+        self.file = file
+        self.autoflush = autoflush
+        self.verbose = verbose
+        if os.path.exists(file) and not overwrite:
+            self.tell('Loading existing file from {}'.format(file))
+            self.df = pd.read_csv(file, sep='\t')
+            self.columns = sorted(self.df.columns.values.tolist())
+        else:
+            self.tell('File {} does not exist or overwrite=True. Creating new frame.'.format(file))
+            assert columns is not None, 'columns cannot be None'
+            self.columns = sorted(columns)
+            dir = os.path.dirname(self.file)
+            if dir and not os.path.exists(dir): os.makedirs(dir)
+            self.df = pd.DataFrame(columns=self.columns)
+        self.defaults = {}
+
+    def already_calculated(self, **kwargs):
+        df = self.df
+        if df.shape[0] == 0:
+            return False
+        if len(kwargs) == 0:
+            kwargs = self.defaults
+        for key,val in kwargs.items():
+            df = df.loc[df[key] == val]
+            if df.shape[0] == 0:
+                return False
+        return True
+
+    def set_default(self, param, value):
+        self.defaults[param] = value
+
+    def add_row(self, **kwargs):
+        for key in self.defaults.keys():
+            if key not in kwargs:
+                kwargs[key]=self.defaults[key]
+        colums = sorted(list(kwargs.keys()))
+        values = [kwargs[col_i] for col_i in colums]
+        s = pd.Series(values, index=self.columns)
+        self.df = self.df.append(s, ignore_index=True)
+        if self.autoflush: self.flush()
+        self.tell(kwargs)
+
+    def flush(self):
+        self.df.to_csv(self.file, index=False, sep='\t')
+
+    def tell(self, msg):
+        if self.verbose: print(msg)
+
+
+
--- a/MultiLabel/util/dataset2leam_format.py
+++ b/MultiLabel/util/dataset2leam_format.py
@ -0,0 +1,33 @@
+from data.dataset import Dataset
+from tqdm import tqdm
+import os
+import numpy as np
+
+
+def write_data(documents, labels, fout):
+    print(f'there are {len(documents)} documents')
+    written, empty = 0, 0
+    with open(fout, 'wt') as foo:
+        for doc, label in tqdm(list(zip(documents, labels))):
+            doc = doc.replace('\t', ' ').replace('\n', ' ').strip()
+            label = np.squeeze(np.asarray(label.todense()))
+            label = ' '.join([f'{x}' for x in label])
+            if doc:
+                foo.write(f'{label}\t{doc}\n')
+                written += 1
+            else:
+                foo.write(f'{label}\tempty document\n')
+                empty += 1
+    print(f'written = {written}')
+    print(f'empty = {empty}')
+
+
+for dataset_name in ['reuters21578', 'ohsumed', 'jrcall', 'rcv1', 'wipo-sl-sc']: #'20newsgroups'
+
+    dataset = Dataset.load(dataset_name=dataset_name, pickle_path=f'../pickles/{dataset_name}.pickle').show()
+
+    os.makedirs(f'../leam/{dataset_name}', exist_ok=True)
+    write_data(dataset.devel_raw, dataset.devel_labelmatrix, f'../leam/{dataset_name}/train.csv')
+    #write_data(dataset.test_raw, dataset.test_labelmatrix, f'../leam/{dataset_name}/test.csv')
+    print('done')
+
--- a/MultiLabel/util/disable_sklearn_warnings.py
+++ b/MultiLabel/util/disable_sklearn_warnings.py
@ -0,0 +1,3 @@
+def warn(*args, **kwargs): pass
+import warnings
+warnings.warn = warn
--- a/MultiLabel/util/early_stop.py
+++ b/MultiLabel/util/early_stop.py
@ -0,0 +1,54 @@
+#adapted from https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
+import torch
+from time import time
+from util.file import create_if_not_exist
+
+
+class EarlyStopping:
+
+    def __init__(self, model, patience=20, verbose=True, checkpoint='./checkpoint.pt'):
+        # set patience to 0 or -1 to avoid stopping, but still keeping track of the best value and model parameters
+        self.patience_limit = patience
+        self.patience = patience
+        self.verbose = verbose
+        self.best_score = None
+        self.best_epoch = None
+        self.stop_time  = None
+        self.checkpoint = checkpoint
+        self.model = model
+        self.STOP = False
+
+    def __call__(self, watch_score, epoch):
+
+        if self.STOP:
+            return #done
+
+        if self.best_score is None or watch_score >= self.best_score:
+            self.best_score = watch_score
+            self.best_epoch = epoch
+            self.stop_time = time()
+            if self.checkpoint:
+                self.print(f'[early-stop] improved, saving model in {self.checkpoint}')
+                torch.save(self.model, self.checkpoint)
+            else:
+                self.print(f'[early-stop] improved')
+            self.patience = self.patience_limit
+        else:
+            self.patience -= 1
+            if self.patience == 0:
+                self.STOP = True
+                self.print(f'[early-stop] patience exhausted')
+            else:
+                if self.patience>0: # if negative, then early-stop is ignored
+                    self.print(f'[early-stop] patience={self.patience}')
+
+    def reinit_counter(self):
+        self.STOP = False
+        self.patience=self.patience_limit
+
+    def restore_checkpoint(self):
+        return torch.load(self.checkpoint)
+
+    def print(self, msg):
+        if self.verbose:
+            print(msg)
--- a/MultiLabel/util/file.py
+++ b/MultiLabel/util/file.py
@ -0,0 +1,38 @@
+import urllib.request
+from os import listdir, makedirs
+from os.path import isdir, isfile, join, exists, dirname
+
+
+def download_file(url, archive_filename):
+    def progress(blocknum, bs, size):
+        total_sz_mb = '%.2f MB' % (size / 1e6)
+        current_sz_mb = '%.2f MB' % ((blocknum * bs) / 1e6)
+        print('\rdownloaded %s / %s' % (current_sz_mb, total_sz_mb), end='')
+    print("Downloading %s" % url)
+    urllib.request.urlretrieve(url, filename=archive_filename, reporthook=progress)
+    print("")
+
+
+def download_file_if_not_exists(url, archive_path):
+    if exists(archive_path): return
+    create_if_not_exist(dirname(archive_path))
+    download_file(url,archive_path)
+
+
+def ls(dir, typecheck):
+    el = [f for f in listdir(dir) if typecheck(join(dir, f))]
+    el.sort()
+    return el
+
+
+def list_dirs(dir):
+    return ls(dir, typecheck=isdir)
+
+
+def list_files(dir):
+    return ls(dir, typecheck=isfile)
+
+
+def create_if_not_exist(path):
+    if not exists(path): makedirs(path)
+
--- a/MultiLabel/util/metrics.py
+++ b/MultiLabel/util/metrics.py
@ -0,0 +1,86 @@
+import numpy as np
+from scipy.sparse import lil_matrix, issparse
+from sklearn.metrics import f1_score, accuracy_score
+
+
+"""
+Scikit learn provides a full set of evaluation metrics, but they treat special cases differently.
+I.e., when the number of true positives, false positives, and false negatives ammount to 0, all
+affected metrices (precision, recall, and thus f1) output 0 in Scikit learn.
+We adhere to the common practice of outputting 1 in this case since the classifier has correctly
+classified all examples as negatives.
+"""
+
+def evaluation(y_true, y_pred, classification_type):
+
+    if classification_type == 'multilabel':
+        eval_function = multilabel_eval
+    elif classification_type == 'singlelabel':
+        eval_function = singlelabel_eval
+
+    Mf1, mf1, accuracy = eval_function(y_true, y_pred)
+
+    return Mf1, mf1, accuracy
+
+
+def multilabel_eval(y, y_):
+
+    tp = y.multiply(y_)
+
+    fn = lil_matrix(y.shape)
+    true_ones = y==1
+    fn[true_ones]=1-tp[true_ones]
+
+    fp = lil_matrix(y.shape)
+    pred_ones = y_==1
+    if pred_ones.nnz>0:
+        fp[pred_ones]=1-tp[pred_ones]
+
+    #macro-f1
+    tp_macro = np.asarray(tp.sum(axis=0), dtype=int).flatten()
+    fn_macro = np.asarray(fn.sum(axis=0), dtype=int).flatten()
+    fp_macro = np.asarray(fp.sum(axis=0), dtype=int).flatten()
+
+    pos_pred = tp_macro+fp_macro
+    pos_true = tp_macro+fn_macro
+    prec=np.zeros(shape=tp_macro.shape,dtype=float)
+    rec=np.zeros(shape=tp_macro.shape,dtype=float)
+    np.divide(tp_macro, pos_pred, out=prec, where=pos_pred>0)
+    np.divide(tp_macro, pos_true, out=rec, where=pos_true>0)
+    den=prec+rec
+
+    macrof1=np.zeros(shape=tp_macro.shape,dtype=float)
+    np.divide(np.multiply(prec,rec),den,out=macrof1,where=den>0)
+    macrof1 *=2
+
+    macrof1[(pos_pred==0)*(pos_true==0)]=1
+    macrof1 = np.mean(macrof1)
+
+    #micro-f1
+    tp_micro = tp_macro.sum()
+    fn_micro = fn_macro.sum()
+    fp_micro = fp_macro.sum()
+    pos_pred = tp_micro + fp_micro
+    pos_true = tp_micro + fn_micro
+    prec = (tp_micro / pos_pred) if pos_pred>0 else 0
+    rec  = (tp_micro / pos_true) if pos_true>0 else 0
+    den = prec+rec
+    microf1 = 2*prec*rec/den if den>0 else 0
+    if pos_pred==pos_true==0:
+        microf1=1
+
+    #accuracy
+    ndecisions = np.multiply(*y.shape)
+    tn = ndecisions - (tp_micro+fn_micro+fp_micro)
+    acc = (tp_micro+tn)/ndecisions
+
+    return macrof1,microf1,acc
+
+
+def singlelabel_eval(y, y_):
+    if issparse(y_): y_ = y_.toarray().flatten()
+    macrof1 = f1_score(y, y_, average='macro')
+    microf1 = f1_score(y, y_, average='micro')
+    acc = accuracy_score(y, y_)
+    return macrof1,microf1,acc
+
--- a/MultiLabel/util/multilabelsvm.py
+++ b/MultiLabel/util/multilabelsvm.py
@ -0,0 +1,65 @@
+from sklearn.svm import LinearSVC
+from sklearn.model_selection import GridSearchCV
+import numpy as np
+from joblib import Parallel, delayed
+from time import time
+
+
+class MLSVC:
+    """
+    Multi-Label Support Vector Machine, with individual optimizations per binary problem.
+    """
+
+    def __init__(self, n_jobs=1, estimator=LinearSVC, *args, **kwargs):
+        self.n_jobs = n_jobs
+        self.args = args
+        self.kwargs = kwargs
+        self.verbose = False if 'verbose' not in self.kwargs else self.kwargs['verbose']
+        self.estimator = estimator
+
+
+    def fit(self, X, y, **grid_search_params):
+        tini = time()
+        assert len(y.shape)==2 and set(np.unique(y).tolist()) == {0,1}, 'data format is not multi-label'
+        nD,nC = y.shape
+        prevalence = np.sum(y, axis=0)
+        self.svms = np.array([self.estimator(*self.args, **self.kwargs) for _ in range(nC)])
+        if grid_search_params and grid_search_params['param_grid']:
+            self._print('grid_search activated with: {}'.format(grid_search_params))
+            # Grid search cannot be performed if the category prevalence is less than the parameter cv.
+            # In those cases we place a svm instead of a gridsearchcv
+            cv = 5 if 'cv' not in grid_search_params else grid_search_params['cv']
+            assert isinstance(cv, int), 'cv must be an int (other policies are not supported yet)'
+            self.svms = [GridSearchCV(svm_i, refit=True, **grid_search_params) if prevalence[i]>=cv else svm_i
+                         for i,svm_i in enumerate(self.svms)]
+        for i in np.argwhere(prevalence==0).flatten():
+            self.svms[i] = TrivialRejector()
+
+        self.svms = Parallel(n_jobs=self.n_jobs)(
+            delayed(self.svms[c].fit)(X,y[:,c]) for c,svm in enumerate(self.svms)
+        )
+        self.training_time = time() - tini
+
+
+    def predict(self, X):
+        return np.vstack(list(map(lambda svmi: svmi.predict(X), self.svms))).T
+
+
+    def predict_proba(self, X):
+        return np.vstack(map(lambda svmi: svmi.predict_proba(X)[:,np.argwhere(svmi.classes_==1)[0,0]], self.svms)).T
+
+
+    def _print(self, msg):
+        if self.verbose>0:
+            print(msg)
+
+
+    def best_params(self):
+        return [svmi.best_params_ if isinstance(svmi, GridSearchCV) else None for svmi in self.svms]
+
+
+class TrivialRejector:
+    def fit(self,*args,**kwargs): return self
+    def predict(self, X): return np.zeros(X.shape[0])
+    def predict_proba(self, X): return np.zeros(X.shape[0])
+
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@ -176,104 +176,6 @@ class LabelledCollection:
            yield train, test


-class MultilingualLabelledCollection:
-    def __init__(self, langs:List[str], labelledCollections:List[LabelledCollection]):
-        assert len(langs) == len(labelledCollections), 'length mismatch for langs and labelledCollection lists'
-        assert all(isinstance(lc, LabelledCollection) for lc in labelledCollections), 'unexpected type for labelledCollections'
-        assert all(labelledCollections[0].classes_ == lc_i.classes_ for lc_i in labelledCollections[1:]), \
-            'inconsistent classes found for some labelled collections'
-        self.llc = {l: lc for l, lc in zip(langs, labelledCollections)}
-        self.classes_=labelledCollections[0].classes_
-
-    @classmethod
-    def fromLangDict(cls, lang_labelledCollection:dict):
-        return MultilingualLabelledCollection(*list(zip(*list(lang_labelledCollection.items()))))
-
-    def langs(self):
-        return list(sorted(self.llc.keys()))
-
-    def __getitem__(self, lang)->LabelledCollection:
-        return self.llc[lang]
-
-    @classmethod
-    def load(cls, path: str, loader_func: callable):
-        return MultilingualLabelledCollection(*loader_func(path))
-
-    def __len__(self):
-        return sum(map(len, self.llc.values()))
-
-    def prevalence(self):
-        prev = np.asarray([lc.prevalence() * len(lc) for lc in self.llc.values()]).sum(axis=0)
-        return prev / prev.sum()
-
-    def language_prevalence(self):
-        lang_count = np.asarray([len(self.llc[l]) for l in self.langs()])
-        return lang_count / lang_count.sum()
-
-    def counts(self):
-        return np.asarray([lc.counts() for lc in self.llc.values()]).sum(axis=0)
-
-    @property
-    def n_classes(self):
-        return len(self.classes_)
-
-    @property
-    def binary(self):
-        return self.n_classes == 2
-
-    def __check_langs(self, l_dict:dict):
-        assert len(l_dict)==len(self.langs()), 'wrong number of languages'
-        assert all(l in l_dict for l in self.langs()), 'missing languages in l_sizes'
-
-    def __check_sizes(self, l_sizes: Union[int,dict]):
-        assert isinstance(l_sizes, int) or isinstance(l_sizes, dict), 'unexpected type for l_sizes'
-        if isinstance(l_sizes, int):
-            return {l:l_sizes for l in self.langs()}
-        self.__check_langs(l_sizes)
-        return l_sizes
-
-    def sampling_index(self, l_sizes: Union[int,dict], *prevs, shuffle=True):
-        l_sizes = self.__check_sizes(l_sizes)
-        return {l:lc.sampling_index(l_sizes[l], *prevs, shuffle=shuffle) for l,lc in self.llc.items()}
-
-    def uniform_sampling_index(self, l_sizes: Union[int, dict]):
-        l_sizes = self.__check_sizes(l_sizes)
-        return {l: lc.uniform_sampling_index(l_sizes[l]) for l,lc in self.llc.items()}
-
-    def uniform_sampling(self, l_sizes: Union[int, dict]):
-        l_sizes = self.__check_sizes(l_sizes)
-        return MultilingualLabelledCollection.fromLangDict(
-            {l: lc.uniform_sampling(l_sizes[l]) for l,lc in self.llc.items()}
-        )
-
-    def sampling(self, l_sizes: Union[int, dict], *prevs, shuffle=True):
-        l_sizes = self.__check_sizes(l_sizes)
-        return MultilingualLabelledCollection.fromLangDict(
-            {l: lc.sampling(l_sizes[l], *prevs, shuffle=shuffle) for l,lc in self.llc.items()}
-        )
-
-    def sampling_from_index(self, l_index:dict):
-        self.__check_langs(l_index)
-        return MultilingualLabelledCollection.fromLangDict(
-            {l: lc.sampling_from_index(l_index[l]) for l,lc in self.llc.items()}
-        )
-
-    def split_stratified(self, train_prop=0.6, random_state=None):
-        train, test = list(zip(*[self[l].split_stratified(train_prop, random_state) for l in self.langs()]))
-        return MultilingualLabelledCollection(self.langs(), train), MultilingualLabelledCollection(self.langs(), test)
-
-    def asLabelledCollection(self, return_langs=False):
-        lXy_list = [([l]*len(lc),*lc.Xy) for l, lc in self.llc.items()]  # a list with (lang_i, Xi, yi)
-        ls,Xs,ys = list(zip(*lXy_list))
-        ls = np.concatenate(ls)
-        vertstack = vstack if issparse(Xs[0]) else np.vstack
-        Xs = vertstack(Xs)
-        ys = np.concatenate(ys)
-        lc = LabelledCollection(Xs, ys, classes_=self.classes_)
-        # return lc, ls if return_langs else lc
-#
-#
-#
 class Dataset:

    def __init__(self, training: LabelledCollection, test: LabelledCollection, vocabulary: dict = None, name=''):
--- a/quapy/data/reader.py
+++ b/quapy/data/reader.py
@ -3,6 +3,13 @@ from scipy.sparse import dok_matrix
 from tqdm import tqdm


+def from_rcv2_lang_file(path, encoding='utf-8'):
+    lines = open(path, 'rt', encoding=encoding).readlines()
+    parts = [l.split('\t') for l in lines]
+    docs, cats = list(zip(*[(parts_i[1], parts_i[2]) for parts_i in parts]))
+    return docs, cats
+
+
 def from_text(path, encoding='utf-8'):
    """
    Reas a labelled colletion of documents.
--- a/quapy/evaluation.py
+++ b/quapy/evaluation.py
@ -105,7 +105,7 @@ def _predict_from_indexes(
        estim_prevalence = quantification_func(sample.instances)
        return true_prevalence, estim_prevalence

-    pbar = tqdm(indexes, desc='[artificial sampling protocol] generating predictions') if verbose else indexes
+    pbar = tqdm(indexes, desc='[sampling protocol] generating predictions') if verbose else indexes
    results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)

    true_prevalences, estim_prevalences = zip(*results)
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -37,6 +37,9 @@ class AggregativeQuantifier(BaseQuantifier):
    def learner(self, value):
        self.learner_ = value

+    def preclassify(self, instances):
+        return self.classify(instances)
+
    def classify(self, instances):
        return self.learner.predict(instances)

@ -74,6 +77,9 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
    probabilities.
    """

+    def preclassify(self, instances):
+        return self.predict_proba(instances)
+
    def posterior_probabilities(self, instances):
        return self.learner.predict_proba(instances)

@ -316,6 +322,12 @@ class PACC(AggregativeProbabilisticQuantifier):

        self.pcc = PCC(self.learner)

+        self.Pte_cond_estim_ = self.getPteCondEstim(classes, y, y_)
+
+        return self
+
+    @classmethod
+    def getPteCondEstim(cls, classes, y, y_):
        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
        # document that belongs to yj ends up being classified as belonging to yi
        n_classes = len(classes)
@ -323,9 +335,7 @@ class PACC(AggregativeProbabilisticQuantifier):
        for i, class_ in enumerate(classes):
            confusion[i] = y_[y == class_].mean(axis=0)

-        self.Pte_cond_estim_ = confusion.T
-
-        return self
+        return confusion.T

    def aggregate(self, classif_posteriors):
        prevs_estim = self.pcc.aggregate(classif_posteriors)
@ -785,7 +795,7 @@ class OneVsAll(AggregativeQuantifier):
        return self.binary_quantifier.get_params()

    def _delayed_binary_classification(self, c, X):
-        return self.dict_binary_quantifiers[c].classify(X)
+        return self.dict_binary_quantifiers[c].preclassify(X)

    def _delayed_binary_posteriors(self, c, X):
        return self.dict_binary_quantifiers[c].posterior_probabilities(X)
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@ -27,7 +27,7 @@ class BaseQuantifier(metaclass=ABCMeta):
    # based on class structure
    @property
    def binary(self):
-        return False
+        return len(self.classes_)==2

    @property
    def aggregative(self):
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@ -227,7 +227,7 @@ def _delayed_new_instance(args):
    if val_split is not None:
        if isinstance(val_split, float):
            assert 0 < val_split < 1, 'val_split should be in (0,1)'
-            data, val_split = data.split_stratified(train_prop=1 - val_split)
+            data, val_split = data.train_test_split(train_prop=1 - val_split)

    sample_index = data.sampling_index(sample_size, *prev)
    sample = data.sampling_from_index(sample_index)
--- a/quapy/method/neural.py
+++ b/quapy/method/neural.py
@ -73,7 +73,7 @@ class QuaNetTrainer(BaseQuantifier):

        if fit_learner:
            classifier_data, unused_data = data.split_stratified(0.4)
-            train_data, valid_data = unused_data.split_stratified(0.66)  # 0.66 split of 60% makes 40% and 20%
+            train_data, valid_data = unused_data.train_test_split(0.66)  # 0.66 split of 60% makes 40% and 20%
            self.learner.fit(*classifier_data.Xy)
        else:
            classifier_data = None
@ -87,8 +87,9 @@ class QuaNetTrainer(BaseQuantifier):
        train_posteriors = self.learner.predict_proba(train_data.instances)

        # turn instances' original representations into embeddings
-        valid_data.instances = self.learner.transform(valid_data.instances)
-        train_data.instances = self.learner.transform(train_data.instances)
+
+        valid_data_embed = LabelledCollection(self.learner.transform(valid_data.instances), valid_data.labels, self._classes_)
+        train_data_embed = LabelledCollection(self.learner.transform(train_data.instances), train_data.labels, self._classes_)

        self.quantifiers = {
            'cc': CC(self.learner).fit(None, fit_learner=False),
@ -110,9 +111,9 @@ class QuaNetTrainer(BaseQuantifier):
        nQ = len(self.quantifiers)
        nC = data.n_classes
        self.quanet = QuaNetModule(
-            doc_embedding_size=train_data.instances.shape[1],
+            doc_embedding_size=train_data_embed.instances.shape[1],
            n_classes=data.n_classes,
-            stats_size=nQ*nC, #+ 2*nC*nC,
+            stats_size=nQ*nC,
            order_by=0 if data.binary else None,
            **self.quanet_params
        ).to(self.device)
@ -124,8 +125,8 @@ class QuaNetTrainer(BaseQuantifier):
        checkpoint = self.checkpoint

        for epoch_i in range(1, self.n_epochs):
-            self.epoch(train_data, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
-            self.epoch(valid_data, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)
+            self.epoch(train_data_embed, train_posteriors, self.tr_iter, epoch_i, early_stop, train=True)
+            self.epoch(valid_data_embed, valid_posteriors, self.va_iter, epoch_i, early_stop, train=False)

            early_stop(self.status['va-loss'], epoch_i)
            if early_stop.IMPROVED:
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@ -97,7 +97,7 @@ class GridSearchQ(BaseQuantifier):
            return training, validation
        elif isinstance(validation, float):
            assert 0. < validation < 1., 'validation proportion should be in (0,1)'
-            training, validation = training.split_stratified(train_prop=1 - validation)
+            training, validation = training.train_test_split(train_prop=1 - validation)
            return training, validation
        else:
            raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the'
Author	SHA1	Message	Date
Alejandro Moreo Fernandez	6f3f103b3b	committing last changes before creating a branch	2021-10-13 11:53:19 +02:00
Alejandro Moreo Fernandez	4572ec266d	adding multi-label classification methods	2021-09-02 11:07:33 +02:00
Alejandro Moreo Fernandez	dc2fa05cf8	launching experiments	2021-08-29 11:03:51 +02:00
Alejandro Moreo Fernandez	13eb682e53	adding tables	2021-08-27 14:01:01 +02:00
Alejandro Moreo Fernandez	daba2c9fb4	adding tables generation	2021-08-27 13:57:33 +02:00
Alejandro Moreo Fernandez	aeb0fcf84b	adding tables generation	2021-08-27 13:57:26 +02:00
Alejandro Moreo Fernandez	db1dbe2534	parallelizing stuff	2021-08-27 12:21:53 +02:00
Alejandro Moreo Fernandez	b941c0665e	preparing some experiments	2021-08-26 17:57:01 +02:00
Alejandro Moreo Fernandez	d6abc7ac85	refactor	2021-08-26 15:52:35 +02:00
Alejandro Moreo Fernandez	d040b2acb6	merged!	2021-08-25 17:10:24 +02:00
Alejandro Moreo Fernandez	c6de5a043d	mlq	2021-08-25 17:08:06 +02:00
Alejandro Moreo Fernandez	ab746eed8d	last updates	2021-08-02 11:08:52 +02:00
Alejandro Moreo Fernandez	60b6fa3c12	new methods, some experiments added	2021-07-06 18:26:05 +02:00
Alejandro Moreo Fernandez	7b8e6462ff	refactoring, chain-classifiers, speeding up for aggregative methods, evaluation modularized	2021-07-06 16:56:54 +02:00
Alejandro Moreo Fernandez	a4fea89122	trying stuff with multilabels	2021-07-05 19:17:29 +02:00
Alejandro Moreo Fernandez	6eac620f22	merging	2021-07-05 09:36:31 +02:00
Alejandro Moreo Fernandez	977599b9b1	cleaning branch	2021-07-05 09:15:36 +02:00
Alejandro Moreo Fernandez	b94dc11ea8	ensembles runing with gridsearchQ in mode npp	2021-07-04 11:30:39 +02:00
Alejandro Moreo Fernandez	f96469da18	adding tweetsentnnp a gitea	2021-07-04 11:24:26 +02:00
Alejandro Moreo Fernandez	1b20bf14ea	exploring multilabel quantification	2021-07-02 17:33:05 +02:00
Alejandro Moreo Fernandez	f0e93692cc	fixing quanet	2021-07-02 10:19:00 +02:00
Alejandro Moreo Fernandez	75a95adfa6	copying files from tweetsent branch	2021-06-29 14:37:26 +02:00