QuaPy/Census/classification_accuracy_1.py

import numpy as np
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
from sklearn.svm import SVC, LinearSVC

from commons import *
from table import Table
from tqdm import tqdm


np.set_printoptions(linewidth=np.inf)

def classifiers():
    yield 'LR-opt', LogisticRegressionCV(Cs=10)
    yield 'LR-opt-bal', LogisticRegressionCV(class_weight='balanced', Cs=10)
    yield 'LR-def', LogisticRegression()
    yield 'SVM-linear', LinearSVC()
    yield 'SVM-rbf', SVC(kernel='rbf')


survey_y = './data/survey_y.csv'

Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True)

preprocessor = Preprocessor()
Xtr = preprocessor.fit_transform(Xtr)

trains = get_dataset_by_area(Atr, Xtr, ytr)
n_areas = len(trains)

areas = [Ai for Ai, _, _ in trains]

tables = []
text_outputs = []

benchmarks  = [f'te-{Ai}' for Ai in areas]  # areas used as test
methods     = [f'tr-{Ai}' for Ai in areas]  # areas on which a quantifier is trained

for cls_name, c in classifiers():

    table = Table(name=cls_name, benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='local', lower_is_better=False)
    table.format.mean_prec = 4
    table.format.show_std = False
    table.format.stat_test = False
    table.format.remove_zero = True

    for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_areas):
        c.fit(Xi, yi)
        for j, (Aj, Xj, yj) in enumerate(trains):
            if i==j: continue
            pred_labels = c.predict(Xj)
            true_labels = yj
            acc = (pred_labels==true_labels).mean()
            table.add(benchmark=f'te-{Aj}', method=f'tr-{Ai}', v=acc)

    for test in benchmarks:
        values = table.get_benchmark_values(test)
        table.add(benchmark=test, method='Best', v=max(values))
        table.add(benchmark=test, method='Worst', v=min(values))
        table.add(benchmark=test, method='AVE', v=np.mean(values))

    tables.append(table)

    text_outputs.append(f'{cls_name} got mean {table.all_mean():.5f}')


Table.LatexPDF(f'./results/classifier/doc.pdf', tables)
with open(f'./results/classifier/output.txt', 'tw') as foo:
    foo.write('\n'.join(text_outputs))
some experiments run, not much to say though 2024-03-27 16:43:28 +01:00			`import numpy as np`
showing experiments 2024-04-03 16:24:55 +02:00			`from sklearn.linear_model import LogisticRegressionCV, LogisticRegression`
			`from sklearn.svm import SVC, LinearSVC`
some experiments run, not much to say though 2024-03-27 16:43:28 +01:00
			`from commons import *`
			`from table import Table`
showing experiments 2024-04-03 16:24:55 +02:00			`from tqdm import tqdm`
some experiments run, not much to say though 2024-03-27 16:43:28 +01:00

			`np.set_printoptions(linewidth=np.inf)`

			`def classifiers():`
showing experiments 2024-04-03 16:24:55 +02:00			`yield 'LR-opt', LogisticRegressionCV(Cs=10)`
			`yield 'LR-opt-bal', LogisticRegressionCV(class_weight='balanced', Cs=10)`
			`yield 'LR-def', LogisticRegression()`
some experiments run, not much to say though 2024-03-27 16:43:28 +01:00			`yield 'SVM-linear', LinearSVC()`
			`yield 'SVM-rbf', SVC(kernel='rbf')`


			`survey_y = './data/survey_y.csv'`

			`Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True)`

			`preprocessor = Preprocessor()`
			`Xtr = preprocessor.fit_transform(Xtr)`

			`trains = get_dataset_by_area(Atr, Xtr, ytr)`
			`n_areas = len(trains)`

			`areas = [Ai for Ai, _, _ in trains]`

			`tables = []`
			`text_outputs = []`

			`benchmarks = [f'te-{Ai}' for Ai in areas] # areas used as test`
			`methods = [f'tr-{Ai}' for Ai in areas] # areas on which a quantifier is trained`

			`for cls_name, c in classifiers():`

			`table = Table(name=cls_name, benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='local', lower_is_better=False)`
			`table.format.mean_prec = 4`
			`table.format.show_std = False`
showing experiments 2024-04-03 16:24:55 +02:00			`table.format.stat_test = False`
some experiments run, not much to say though 2024-03-27 16:43:28 +01:00			`table.format.remove_zero = True`

			`for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_areas):`
			`c.fit(Xi, yi)`
			`for j, (Aj, Xj, yj) in enumerate(trains):`
			`if i==j: continue`
			`pred_labels = c.predict(Xj)`
			`true_labels = yj`
			`acc = (pred_labels==true_labels).mean()`
			`table.add(benchmark=f'te-{Aj}', method=f'tr-{Ai}', v=acc)`

			`for test in benchmarks:`
			`values = table.get_benchmark_values(test)`
			`table.add(benchmark=test, method='Best', v=max(values))`
			`table.add(benchmark=test, method='Worst', v=min(values))`
			`table.add(benchmark=test, method='AVE', v=np.mean(values))`

			`tables.append(table)`

			`text_outputs.append(f'{cls_name} got mean {table.all_mean():.5f}')`


			`Table.LatexPDF(f'./results/classifier/doc.pdf', tables)`
			`with open(f'./results/classifier/output.txt', 'tw') as foo:`
			`foo.write('\n'.join(text_outputs))`