2024-03-27 16:43:28 +01:00
|
|
|
import numpy as np
|
2024-04-03 16:24:55 +02:00
|
|
|
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression
|
|
|
|
from sklearn.svm import SVC, LinearSVC
|
2024-03-27 16:43:28 +01:00
|
|
|
|
|
|
|
from commons import *
|
|
|
|
from table import Table
|
2024-04-03 16:24:55 +02:00
|
|
|
from tqdm import tqdm
|
2024-03-27 16:43:28 +01:00
|
|
|
|
|
|
|
|
|
|
|
np.set_printoptions(linewidth=np.inf)
|
|
|
|
|
|
|
|
def classifiers():
|
2024-04-03 16:24:55 +02:00
|
|
|
yield 'LR-opt', LogisticRegressionCV(Cs=10)
|
|
|
|
yield 'LR-opt-bal', LogisticRegressionCV(class_weight='balanced', Cs=10)
|
|
|
|
yield 'LR-def', LogisticRegression()
|
2024-03-27 16:43:28 +01:00
|
|
|
yield 'SVM-linear', LinearSVC()
|
|
|
|
yield 'SVM-rbf', SVC(kernel='rbf')
|
|
|
|
|
|
|
|
|
|
|
|
survey_y = './data/survey_y.csv'
|
|
|
|
|
|
|
|
Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True)
|
|
|
|
|
|
|
|
preprocessor = Preprocessor()
|
|
|
|
Xtr = preprocessor.fit_transform(Xtr)
|
|
|
|
|
|
|
|
trains = get_dataset_by_area(Atr, Xtr, ytr)
|
|
|
|
n_areas = len(trains)
|
|
|
|
|
|
|
|
areas = [Ai for Ai, _, _ in trains]
|
|
|
|
|
|
|
|
tables = []
|
|
|
|
text_outputs = []
|
|
|
|
|
|
|
|
benchmarks = [f'te-{Ai}' for Ai in areas] # areas used as test
|
|
|
|
methods = [f'tr-{Ai}' for Ai in areas] # areas on which a quantifier is trained
|
|
|
|
|
|
|
|
for cls_name, c in classifiers():
|
|
|
|
|
|
|
|
table = Table(name=cls_name, benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='local', lower_is_better=False)
|
|
|
|
table.format.mean_prec = 4
|
|
|
|
table.format.show_std = False
|
2024-04-03 16:24:55 +02:00
|
|
|
table.format.stat_test = False
|
2024-03-27 16:43:28 +01:00
|
|
|
table.format.remove_zero = True
|
|
|
|
|
|
|
|
for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_areas):
|
|
|
|
c.fit(Xi, yi)
|
|
|
|
for j, (Aj, Xj, yj) in enumerate(trains):
|
|
|
|
if i==j: continue
|
|
|
|
pred_labels = c.predict(Xj)
|
|
|
|
true_labels = yj
|
|
|
|
acc = (pred_labels==true_labels).mean()
|
|
|
|
table.add(benchmark=f'te-{Aj}', method=f'tr-{Ai}', v=acc)
|
|
|
|
|
|
|
|
for test in benchmarks:
|
|
|
|
values = table.get_benchmark_values(test)
|
|
|
|
table.add(benchmark=test, method='Best', v=max(values))
|
|
|
|
table.add(benchmark=test, method='Worst', v=min(values))
|
|
|
|
table.add(benchmark=test, method='AVE', v=np.mean(values))
|
|
|
|
|
|
|
|
tables.append(table)
|
|
|
|
|
|
|
|
text_outputs.append(f'{cls_name} got mean {table.all_mean():.5f}')
|
|
|
|
|
|
|
|
|
|
|
|
Table.LatexPDF(f'./results/classifier/doc.pdf', tables)
|
|
|
|
with open(f'./results/classifier/output.txt', 'tw') as foo:
|
|
|
|
foo.write('\n'.join(text_outputs))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|