import numpy as np from sklearn.linear_model import LogisticRegressionCV from Census.methods import AreaQuantifier, AggregationRule from quapy.data import LabelledCollection from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE from quapy.method.aggregative import CC, PCC, ACC, PACC, EMQ from commons import * from table import Table from tqdm import tqdm import quapy as qp from copy import deepcopy np.set_printoptions(linewidth=np.inf) def classifier(): return LogisticRegressionCV() def quantifiers(): cls = classifier() yield 'MLPE', MLPE() yield 'CC', CC(cls) yield 'PCC', PCC(cls) yield 'ACC', ACC(cls) yield 'PACC', PACC(cls) yield 'SLD', EMQ(cls) yield 'SModelLR', StatModelLR() yield 'SModel', StatModel(mean=prob_mean, scale=prob_std) survey_y = './data/survey_y.csv' Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True) preprocessor = Preprocessor() Xtr = preprocessor.fit_transform(Xtr) prob_mean, prob_std = preprocessor.get_mean_std(column=-1) # get the mean and std of the "prob" colum data = get_dataset_by_area(Atr, Xtr, ytr) n_areas = len(data) areas = [Ai for Ai, _, _ in data] q_names = [q_name for q_name, _ in quantifiers()] tables = [] text_outputs = [] benchmarks = [f'te-{Ai}' for Ai in areas] # areas used as test for aggr in ['median', 'mean']: # areas on which a quantifier is trained, e.g., 'PACC-w/o46' means a PACC quantifier # has been trained on all areas but 46 methods = [f'{q_name}-{aggr}' for q_name in q_names] table = Table(name=f'all{aggr}', benchmarks=benchmarks, methods=methods, stat_test=None, color_mode='local') table.format.mean_prec = 4 table.format.show_std = False table.format.sta = False table.format.remove_zero = True for q_name, q in quantifiers(): # pretrain quantifiers per area pretrained_area_q = [] for i, (Ai, Xi, yi) in tqdm(enumerate(data), total=n_areas): q_i = deepcopy(q) q_i.fit(LabelledCollection(Xi, yi)) pretrained_area_q.append(AreaQuantifier(Ai, q_i)) for i, (Ai, Xi, yi) in tqdm(enumerate(data), total=n_areas): # compose members of the rule (quantifiers are already fit) #training area_quantifiers = [qA_j for qA_j in pretrained_area_q if qA_j.area != Ai] rule = AggregationRule(area_quantifiers, aggr=aggr) #test te = LabelledCollection(Xi, yi) qp.environ["SAMPLE_SIZE"] = len(te) pred_prev = rule.predict(Ai, te.X) true_prev = te.prevalence() err = qp.error.mae(true_prev, pred_prev) method_name = f'{q_name}-{aggr}' table.add(benchmark=f'te-{Ai}', method=method_name, v=err) # text_outputs.append(f'{q_name} got mean {table.all_mean():.5f}, best mean {table.get_method_values("Best").mean():.5f}') tables.append(table) Table.LatexPDF(f'./results/allaggregation/doc.pdf', tables) # with open(f'./results/classifier/output.txt', 'tw') as foo: # foo.write('\n'.join(text_outputs))