QuaPy/Census/pairwise_2.py

import numpy as np
from sklearn.linear_model import LogisticRegressionCV

from quapy.data import LabelledCollection
from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
from quapy.method.aggregative import CC, PCC, ACC, PACC, EMQ, SLD
from commons import *
from table import Table
from tqdm import tqdm
import quapy as qp

np.set_printoptions(linewidth=np.inf)

def classifier():
    #return LogisticRegressionCV(class_weight='balanced', Cs=10)
    return LogisticRegressionCV()

def quantifiers():
    cls = classifier()
    yield 'MLPE', MLPE()
    yield 'CC', CC(cls)
    yield 'PCC', PCC(cls)
    yield 'ACC', ACC(cls)
    yield 'PACC', PACC(cls)
    yield 'SLD', SLD(cls)
    yield 'SModelLR', StatModelLR()
    yield 'SModel', StatModel(mean=prob_mean, scale=prob_std)


survey_y = './data/survey_y.csv'

Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True)

preprocessor = Preprocessor()
Xtr = preprocessor.fit_transform(Xtr)
prob_mean, prob_std = preprocessor.get_mean_std(column=-1)  # get the mean and std of the "prob" colum

trains = get_dataset_by_area(Atr, Xtr, ytr)
n_areas = len(trains)

areas = [Ai for Ai, _, _ in trains]

tables = []
text_outputs = []

benchmarks  = [f'te-{Ai}' for Ai in areas]  # areas used as test
methods     = [f'tr-{Ai}' for Ai in areas]  # areas on which a quantifier is trained


for q_name, q in quantifiers():

    table = Table(name=q_name, benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='global')
    table.format.mean_prec = 4
    table.format.show_std = False
    table.format.sta = False
    table.format.remove_zero = True
    table.with_mean = True

    for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_areas):
        tr = LabelledCollection(Xi, yi)
        q.fit(tr)
        len_tr = len(tr)
        for j, (Aj, Xj, yj) in enumerate(trains):
            if i==j: continue
            te = LabelledCollection(Xj, yj)
            qp.environ["SAMPLE_SIZE"] = len(te)
            pred_prev = q.quantify(te.X)
            true_prev = te.prevalence()
            # err = qp.error.mrae(true_prev, pred_prev)
            err = qp.error.mae(true_prev, pred_prev)
            table.add(benchmark=f'te-{Aj}', method=f'tr-{Ai}', v=err)

    for test in benchmarks:
        values = table.get_benchmark_values(test)
        table.add(benchmark=test, method='Best', v=min(values))
        table.add(benchmark=test, method='Worst', v=max(values))
        table.add(benchmark=test, method='AVE', v=np.mean(values))

    tables.append(table)

    text_outputs.append(f'{q_name} got mean {table.all_mean():.5f}, best mean {table.get_method_values("Best").mean():.5f}')


Table.LatexPDF(f'./results/pairwise/doc.pdf', tables)

with open(f'./results/classifier/output.txt', 'tw') as foo:
    foo.write('\n'.join(text_outputs))