QuaPy/Census/classification_accuracy_1.py

import numpy as np
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
from sklearn.svm import SVC, LinearSVC

from commons import *
from table import Table
from tqdm import tqdm


np.set_printoptions(linewidth=np.inf)

def classifiers():
    yield 'LR-opt', LogisticRegressionCV(Cs=10)
    yield 'LR-opt-bal', LogisticRegressionCV(class_weight='balanced', Cs=10)
    yield 'LR-def', LogisticRegression()
    yield 'SVM-linear', LinearSVC()
    yield 'SVM-rbf', SVC(kernel='rbf')


survey_y = './data/survey_y.csv'

Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True)

preprocessor = Preprocessor()
Xtr = preprocessor.fit_transform(Xtr)

trains = get_dataset_by_area(Atr, Xtr, ytr)
n_areas = len(trains)

areas = [Ai for Ai, _, _ in trains]

tables = []
text_outputs = []

benchmarks  = [f'te-{Ai}' for Ai in areas]  # areas used as test
methods     = [f'tr-{Ai}' for Ai in areas]  # areas on which a quantifier is trained

for cls_name, c in classifiers():

    table = Table(name=cls_name, benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='local', lower_is_better=False)
    table.format.mean_prec = 4
    table.format.show_std = False
    table.format.stat_test = False
    table.format.remove_zero = True

    for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_areas):
        c.fit(Xi, yi)
        for j, (Aj, Xj, yj) in enumerate(trains):
            if i==j: continue
            pred_labels = c.predict(Xj)
            true_labels = yj
            acc = (pred_labels==true_labels).mean()
            table.add(benchmark=f'te-{Aj}', method=f'tr-{Ai}', v=acc)

    for test in benchmarks:
        values = table.get_benchmark_values(test)
        table.add(benchmark=test, method='Best', v=max(values))
        table.add(benchmark=test, method='Worst', v=min(values))
        table.add(benchmark=test, method='AVE', v=np.mean(values))

    tables.append(table)

    text_outputs.append(f'{cls_name} got mean {table.all_mean():.5f}')


Table.LatexPDF(f'./results/classifier/doc.pdf', tables)
with open(f'./results/classifier/output.txt', 'tw') as foo:
    foo.write('\n'.join(text_outputs))