QuaPy/Census/main.py

80 lines
2.2 KiB
Python

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.svm import LinearSVC
from tqdm import tqdm
import quapy as qp
from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as MLPE
from quapy.method.aggregative import EMQ, PACC, CC, PCC, MS2, MS, ACC
from quapy.data import LabelledCollection
from sklearn.preprocessing import StandardScaler
from commons import *
np.set_printoptions(linewidth=np.inf)
cens_y = './data/cens_y.csv'
survey_y = './data/survey_y.csv'
# Ate, Xte = load_csv(cens_y)
Atr, Xtr, ytr = load_csv(survey_y, use_yhat=True)
preprocessor = Preprocessor()
Xtr = preprocessor.fit_transform(Xtr)
# Xtr_proc = preprocessor.fit_transform(Xtr)
# big_train = LabelledCollection(Xtr_proc, ytr)
# q.fit(big_train)
trains = get_dataset_by_area(Atr, Xtr, ytr)
# tests = get_dataset_by_area(Ate, Xte)
n_area = len(trains)
# cls = LinearSVC()
cls = LogisticRegression()
# cls = LogisticRegressionCV(class_weight='balanced', Cs=10)
# q = CC(cls)
# q = PCC(cls)
# q = PACC(cls)
q = EMQ(cls)
# q = MS(cls)
#q = MaximumLikelihoodPrevalenceEstimation()
for q in [CC(cls), PCC(cls), ACC(cls), PACC(cls), EMQ(cls), MLPE()]:
results = np.zeros(shape=(n_area, n_area))
for i, (Ai, Xi, yi) in tqdm(enumerate(trains), total=n_area):
# Xi = preprocessor.fit_transform(Xi)
tr = LabelledCollection(Xi, yi)
q.fit(tr)
len_tr = len(tr)
# len_tr = len(big_train)
for j, (Aj, Xj, yj) in enumerate(trains):
if i==j: continue
# Xj = preprocessor.transform(Xj)
te = LabelledCollection(Xj, yj)
pred_prev = q.quantify(te.X)
true_prev = te.prevalence()
# qp.environ["SAMPLE_SIZE"] = len(te)
# err = qp.error.mrae(true_prev, pred_prev)
err = qp.error.mae(true_prev, pred_prev)
print(f'{i=} {j=} [#train={len_tr}] true_prev={true_prev[1]:.3f} pred_prev={pred_prev[1]:.3f} {err=:.4f}')
results[i,j] = err
import sys; sys.exit()
q_name = q.__class__.__name__
# print(results)
print(f'{q_name} mean results = {results.mean():.4f}')
results += np.eye(results.shape[0])
print(results.min(axis=0).mean())