Compare commits

...

8 Commits

13 changed files with 1416 additions and 6 deletions

View File

@ -0,0 +1,212 @@
import numpy as np
import matplotlib.pyplot as plt
import sklearn.preprocessing
from matplotlib import cm
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import normalize
import quapy as qp
import quapy.functional as F
from quapy.data import LabelledCollection
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ
import os
from scipy.stats import ttest_rel
"""
The idea of this method is to make a first guess of the test class distribution (maybe with PACC) and then
train a method without adjustment (maybe PCC) setting the class_weight param in such a way that best compensates
for the positive and negative contribution wrt the guessed distribution. The method can be iterative, though I
have not seen any major inprovements (if at all) in doing more than 1 iteration.
This file is the proof of concept with artificial data and nice plots. The quantifier is implemented in file
class_weight_model.py.
So far, it looks like for artificial datasets works, for UCI (without model selection for now) works better than PACC.
For reviews it does not improve over PACC though.
"""
x_min, x_max = 0, 11
y_min, y_max = 0, x_max
center0 = (2*x_max/5,2*x_max/5)
center1 = (3*x_max/5,3*x_max/5)
X, Y = make_blobs(n_samples=[100000, 100000], n_features=2, centers=[center0,center1])
data = LabelledCollection(X, Y)
train_pool, test_pool = data.split_stratified(train_prop=0.5)
def plot(fignum, title, savepath=None):
clf = q.learner
# get the separating hyperplane
w = clf.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(0, x_max)
yy = a * xx - (clf.intercept_[0]) / w[1]
wref = reference_hyperplane.coef_[0]
aref = -wref[0] / wref[1]
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
# Z = clf.decision_function(xy).reshape(XX.shape)
# Z2 = reference_hyperplane.decision_function(xy).reshape(XX.shape)
# plot the line and the points
plt.figure(fignum + 1, figsize=(10, 10))
plt.clf()
plt.plot(xx, yy, "k-")
Xte, yte = test.Xy
# plt.scatter(Xte[:, 0], Xte[:, 1], c=test.labels, zorder=10, cmap=cm.get_cmap("RdBu"), alpha=0.4)
cmap=cm.get_cmap("RdBu")
plt.scatter(Xte[yte==0][:, 0], Xte[yte==0][:, 1], color=cmap(0), zorder=10, alpha=0.4, label='-')
plt.scatter(Xte[yte==1][:, 0], Xte[yte==1][:, 1], color=cmap(cmap.N-1), zorder=10, alpha=0.4, label='+')
plt.axis("tight")
# Put the result into a contour plot
# plt.contourf(XX, YY, Z, cmap=cm.get_cmap("RdBu"), alpha=0.6, levels=50, linestyles=None)
plt.plot(xx, a * xx - (clf.intercept_[0]) / w[1], 'k-', label='modified')
plt.plot(xx, aref * xx - (reference_hyperplane.intercept_[0]) / wref[1], 'k--', label='original')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.title(title)
plt.legend()
if savepath:
plt.savefig(savepath)
def mock_y(prev):
n=10000
nneg = int(n * prev[0])
npos = int(n * prev[1])
mock = np.asarray([0]*nneg + [1]*npos, dtype=int)
return mock
def get_class_weight(prevalence):
# class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence))
# return {0: class_weight[1], 1: class_weight[0]}
# weights = prevalence/prevalence.min()
weights = prevalence / train.prevalence()
normfactor = weights.min()
if normfactor <= 0:
normfactor = 1E-3
weights /= normfactor
return {0:weights[0], 1:weights[1]}
def train_eval(class_weight, test):
q = Method(LogisticRegression(class_weight=class_weight))
q.fit(train)
prev_estim = q.quantify(test.instances)
true_prev = test.prevalence()
ae = qp.error.ae(true_prev, prev_estim)
return q, prev_estim, ae
probabilistic = True
Prompter = PACC # the method creating the very first guess
Baseline = PACC if probabilistic else ACC
bname = Baseline.__name__
Method = PCC if probabilistic else CC
mname = Method.__name__
plotdir=f'./plots/{mname}_vs_{bname}'
os.makedirs(plotdir, exist_ok=True)
test_prevs = np.linspace(0,1,20)
train_prevs = np.linspace(0.05,0.95,20)
fignum = 0
wins, total = 0, 0
merrors = []
berrors = []
for ptr in train_prevs:
train = train_pool.sampling(10000, ptr)
reference_hyperplane = LogisticRegression().fit(*train.Xy)
baseline = Baseline(LogisticRegression()).fit(train)
if Baseline != Prompter:
prompter = Prompter(LogisticRegression()).fit(train)
else:
prompter = baseline
for pte in test_prevs:
test = test_pool.sampling(10000, pte)
# some baseline results
prev_estim_acc = baseline.quantify(test.instances)
ae_baseline = qp.error.ae(test.prevalence(), prev_estim_acc)
berrors.append(ae_baseline)
# guessed_prevalence = train.prevalence()
guessed_prevalence = prompter.quantify(test.instances)
niter=10
last_prev = None
for i in range(niter):
class_weight = get_class_weight(guessed_prevalence)
q, prev_estim, ae = train_eval(class_weight, test)
stop = (i == niter-1) or (last_prev is not None and qp.error.ae(prev_estim, last_prev) < 0.001)
if stop:
merrors.append(ae)
win = ae < ae_baseline
if win: wins+=1
print(f'{i}: tr_prev={F.strprev(train.prevalence())} te_prev={F.strprev(test.prevalence())}, {mname}+ estim_prev={F.strprev(prev_estim)} AE={ae:.5f} '
f'using class_weight [{class_weight[0]:.3f}, {class_weight[1]:.3f}] '
f'({bname} prev={F.strprev(prev_estim_acc)} AE={ae_baseline:.5f}) '
f'{"WIN" if win else "LOSE"}')
break
else:
last_prev = prev_estim
# title='$\hat{{p}}^{{{}}}={:.3f}$, $p={:.3f}$, $\hat{{p}}={:.3f}$, AE$_{{{}}}={:.3f}$, AE$_{{{}}}={:.3f}$'.format(
# i, guessed_prevalence[0], pte, prev_estim[0], mname, ae, bname, ae_baseline
# )
# savepath=os.path.join(plotdir, f'tr_{ptr}_te{pte}_{i}.png')
# plot(fignum, title, savepath)
fignum+=1
guessed_prevalence = prev_estim
total += 1
merrors = np.asarray(merrors)
berrors = np.asarray(berrors)
mean_merrors = merrors.mean()
mean_berrors = berrors.mean()
print(f'WINS={wins}/{total}={100*wins/total:.2f}%')
_,p_val = ttest_rel(merrors,berrors)
print(f'{mname}-ave={mean_merrors:.5f} {bname}-ave={mean_berrors:.5f}')
print(f'ttest p-value={p_val:5f} significant={p_val<0.05}')

View File

@ -0,0 +1,87 @@
from sklearn.base import BaseEstimator
import numpy as np
import quapy as qp
import quapy.functional as F
from data import LabelledCollection
from method.aggregative import ACC
from method.base import BaseQuantifier
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=10)
class DecisionStump(BaseEstimator):
def __init__(self, feat_id):
self.feat_id = feat_id
self.classes_ = np.asarray([0,1], dtype=int)
def fit(self, X, y):
return self
def predict(self, X):
return (X[:,self.feat_id].toarray().flatten()>0).astype(int)
class QuantificationStump(BaseQuantifier):
def __init__(self, feat_id):
self.feat_id = feat_id
def fit(self, data: LabelledCollection):
self.qs = ACC(DecisionStump(self.feat_id))
self.qs.fit(data, fit_learner=False, val_split=data)
self.classes = data.classes_
return self
def quantify(self, instances):
return self.qs.quantify(instances)
def set_params(self, **parameters):
raise NotImplemented()
def get_params(self, deep=True):
raise NotImplemented()
@property
def classes_(self):
return self.classes
train, dev = data.training.split_stratified()
test = data.test.sampling(1000, 0.3, 0.7)
print(f'test prevalence = {F.strprev(test.prevalence())}')
nF = train.instances.shape[1]
qs_scores = []
qs = np.asarray([QuantificationStump(i).fit(train) for i in tqdm(range(nF))])
scores = np.zeros(shape=(nF, 11*5))
for j, dev_sample in tqdm(enumerate(dev.artificial_sampling_generator(500, n_prevalences=11, repeats=5)), total=11*5):
sample_prev = dev_sample.prevalence()
for i, qs_i in enumerate(qs):
estim_prev = qs_i.quantify(dev.instances)
error = qp.error.ae(sample_prev, estim_prev)
scores[i,j] = error
k=250
scores = scores.mean(axis=1)
order = np.argsort(scores)
qs = qs[order][:k]
prevs = np.asarray([qs_i.quantify(test.instances)[1] for qs_i in tqdm(qs)])
print(f'test estimation mean {prevs.mean():.3f}, median = {np.median(prevs)}')
# sns.histplot(data=prevs, binwidth=3)
# An "interface" to matplotlib.axes.Axes.hist() method
# n, bins, patches = plt.hist(x=prevs, bins='auto', alpha=0.7)
# plt.grid(axis='y', alpha=0.75)
# plt.xlabel('Value')
# plt.ylabel('Frequency')
# plt.title('My Very Own Histogram')
# maxfreq = n.max()
# Set a clean upper y-axis limit.
# plt.ylim(ymax=np.ceil(maxfreq / 10) * 10 if maxfreq % 10 else maxfreq + 10)
# plt.show()

View File

@ -0,0 +1,94 @@
from sklearn import clone
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
import numpy as np
from sklearn.model_selection import GridSearchCV
import quapy as qp
from data import LabelledCollection
from method.base import BaseQuantifier
from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier, CC, ACC, PCC, PACC
"""
Possible extensions:
- add CC and ClassWeightCC
- understanding how to optimize hyper-parameters for the final PCC quantifier. It is not trivial, since once
class_weight has been set, the C parameter plays a secondary role. The reason is that I hardly doubt that
the cross-validation is taking into account the fact that one class might be more important than the other,
and so the best C parameter for quantifying, conditioned on this class prevelance, has nothing to do with the
best C for classifying the current data... Unless I define an evaluation metric weighting for each class weight,
but this is very tricky (it is like implementing the "adjustment" in the evaluation metric...)
- might be worth investigating deeper about the role of CV, and val_split, in ACC/PACC. Is it something that
consistently deliver improved accuracies (for quantification) or there is a tricky trade-off between the data
usage, the instability due to adjusting for slightly different quantifiers, and so on?
- argue that this method is only interesting in cases in which we have few data (adjustment discards data),
and not when the classifier is a costly one (we require training during test). Argue that the computational
burden can be transfered to the training stage, by training many LR for different class_weight ratios, and
then using the most similar one, to the guessed prevalence, during test.
- better investigate the "iterative" nature of the method.
- better investigate the implications with other learners. E.g., using EMQ as a prompt, or using EMQ in the second
stage (test).
- test with SVM (not working well... and problematic due to the fact that svms need to be calibrated)
- test in multiclass scenarios
"""
class ClassWeightPCC(BaseQuantifier):
def __init__(self, estimator=LogisticRegression, **pcc_param_grid):
self.estimator = estimator
self.learner = PACC(self.estimator())
if 'class_weight' in pcc_param_grid:
raise ValueError('parameter "class_weight" cannot be included in "pcc_param_grid"')
self.pcc_param_grid = dict(pcc_param_grid)
self.deployed = False
def fit(self, data: LabelledCollection, fit_learner=True):
self.train = data
self.learner.fit(self.train)
return self
def quantify(self, instances):
guessed_prevalence = self.learner.quantify(instances)
class_weight = self._get_class_weight(guessed_prevalence)
if self.pcc_param_grid and self.deployed:
"""If the param grid has been specified, then use it to find good hyper-parameters for the classifier.
In this case, we know (an approximation of) the target prevalence, so we might simply want to optimize
for classification (and not for quantification)"""
# pcc = PCC(GridSearchCV(LogisticRegression(class_weight=class_weight), param_grid=self.pcc_param_grid, n_jobs=-1))
pcc = PCC(LogisticRegressionCV(Cs=self.pcc_param_grid['C'], class_weight=class_weight, n_jobs=-1, cv=3))
raise ValueError('this cannot work...')
else:
"""If the param grid has not been specified, we take the best parameters found for the base quantifier"""
base_parameters = dict(self.learner.get_params())
for p,v in self.learner.get_params().items():
# this search is in order to allow for quantifiers that work with a CalibratedClassifierCV to work
if 'class_weight' in p:
base_parameters[p] = class_weight
break
base_estimator = clone(self.learner.learner)
base_estimator.set_params(**base_parameters)
pcc = PCC(base_estimator)
return pcc.fit(self.train).quantify(instances)
def _get_class_weight(self, prevalence):
# class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence))
# return {0: class_weight[1], 1: class_weight[0]}
# weights = prevalence/prevalence.min()
weights = prevalence / self.train.prevalence()
normfactor = weights.min()
if normfactor <= 0:
normfactor = 1E-3
weights /= normfactor
return {0:weights[0], 1:weights[1]}
def set_params(self, **parameters):
# parameters = {p:v for p,v in parameters.items()}
# print(parameters)
self.learner.set_params(**parameters)
def get_params(self, deep=True):
return self.learner.get_params()
@property
def classes_(self):
return self.train.classes_

97
NewMethods/common.py Normal file
View File

@ -0,0 +1,97 @@
import pickle
import os
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import LogisticRegression
import quapy as qp
def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
def calibratedLR():
return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1))
def save_results(result_dir, dataset_name, model_name, run, optim_loss, *results):
rpath = result_path(result_dir, dataset_name, model_name, run, optim_loss)
qp.util.create_parent_dir(rpath)
with open(rpath, 'wb') as foo:
pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
def evaluate_experiment(true_prevalences, estim_prevalences):
print('\nEvaluation Metrics:\n' + '=' * 22)
for eval_measure in [qp.error.mae, qp.error.mrae]:
err = eval_measure(true_prevalences, estim_prevalences)
print(f'\t{eval_measure.__name__}={err:.4f}')
print()
def result_path(path, dataset_name, model_name, run, optim_loss):
return os.path.join(path, f'{dataset_name}-{model_name}-run{run}-{optim_loss}.pkl')
def is_already_computed(result_dir, dataset_name, model_name, run, optim_loss):
return os.path.exists(result_path(result_dir, dataset_name, model_name, run, optim_loss))
nice = {
'pacc.opt': 'PACC(LR)',
'pacc.opt.svm': 'PACC(SVM)',
'pcc.opt': 'PCC(LR)',
'pcc.opt.svm': 'PCC(SVM)',
'wpacc.opt': 'R-PCC(LR)',
'wpacc.opt.svm': 'R-PCC(SVM)',
'mae':'AE',
'ae':'AE',
'svmkld': 'SVM(KLD)',
'svmnkld': 'SVM(NKLD)',
'svmq': 'SVM(Q)',
'svmae': 'SVM(AE)',
'svmmae': 'SVM(AE)',
'svmmrae': 'SVM(RAE)',
'hdy': 'HDy',
'sldc': 'SLD',
'X': 'TSX',
'T50': 'TS50',
'ehdymaeds': 'E(HDy)$_\mathrm{DS}$',
'Average': 'Average',
'EMdiag':'EM$_{diag}$', 'EMfull':'EM$_{full}$', 'EMtied':'EM$_{tied}$', 'EMspherical':'EM$_{sph}$',
'VEMdiag':'VEM$_{diag}$', 'VEMfull':'VEM$_{full}$', 'VEMtied':'VEM$_{tied}$', 'VEMspherical':'VEM$_{sph}$',
'epaccmaemae1k': 'E(PACC)$_\mathrm{AE}$',
'quanet': 'QuaNet'
}
def nicerm(key):
return '\mathrm{'+nice[key]+'}'
def nicename(method, eval_name=None, side=False):
m = nice.get(method, method.upper())
if eval_name is not None:
m = m.replace('$$','')
if side:
m = '\side{'+m+'}'
return m
def save_table(path, table):
print(f'saving results in {path}')
with open(path, 'wt') as foo:
foo.write(table)
def experiment_errors(path, dataset, method, run, eval_loss, optim_loss=None):
if optim_loss is None:
optim_loss = eval_loss
path = result_path(path, dataset, method, run, 'm' + optim_loss if not optim_loss.startswith('m') else optim_loss)
if os.path.exists(path):
true_prevs, estim_prevs, _, _, _ = pickle.load(open(path, 'rb'))
err_fn = getattr(qp.error, eval_loss)
errors = err_fn(true_prevs, estim_prevs)
return errors
return None

View File

@ -0,0 +1,174 @@
from sklearn.calibration import CalibratedClassifierCV
import quapy as qp
from sklearn.linear_model import LogisticRegression
from class_weight_model import ClassWeightPCC
# from classification.methods import LowRankLogisticRegression
# from method.experimental import ExpMax, VarExpMax
from common import *
from method.meta import QuaNet
from quantification_stumps_model import QuantificationStumpRegressor
from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy
from quapy.method.meta import EHDy
import numpy as np
import os
import pickle
import itertools
import argparse
import torch
import shutil
SAMPLE_SIZE = 500
N_JOBS = -1
CUDA_N_JOBS = 2
ENSEMBLE_N_JOBS = -1
qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
__C_range = np.logspace(-3, 3, 7)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
def quantification_models():
# yield 'cc', CC(newLR()), lr_params
# yield 'acc', ACC(newLR()), lr_params
# yield 'pcc', PCC(newLR()), None
# yield 'pacc', PACC(newLR()), None
# yield 'wpacc', ClassWeightPCC(), None
# yield 'pcc.opt', PCC(newLR()), lr_params
# yield 'pacc.opt', PACC(newLR()), lr_params
# yield 'wpacc.opt', ClassWeightPCC(), lr_params
yield 'ds', QuantificationStumpRegressor(SAMPLE_SIZE, 21, 10), None
# yield 'ds.opt', QuantificationStumpRegressor(SAMPLE_SIZE), {'C': __C_range}
# yield 'MAX', MAX(newLR()), lr_params
# yield 'MS', MS(newLR()), lr_params
# yield 'MS2', MS2(newLR()), lr_params
# yield 'sldc', EMQ(calibratedLR()), lr_params
# yield 'svmmae', SVMAE(), svmperf_params
# yield 'hdy', HDy(newLR()), lr_params
# yield 'EMdiag', ExpMax(cov_type='diag'), None
# yield 'EMfull', ExpMax(cov_type='full'), None
# yield 'EMtied', ExpMax(cov_type='tied'), None
# yield 'EMspherical', ExpMax(cov_type='spherical'), None
# yield 'VEMdiag', VarExpMax(cov_type='diag'), None
# yield 'VEMfull', VarExpMax(cov_type='full'), None
# yield 'VEMtied', VarExpMax(cov_type='tied'), None
# yield 'VEMspherical', VarExpMax(cov_type='spherical'), None
# def quantification_cuda_models():
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# print(f'Running QuaNet in {device}')
# learner = LowRankLogisticRegression(**newLR().get_params())
# yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
def quantification_ensembles():
param_mod_sel = {
'sample_size': SAMPLE_SIZE,
'n_prevpoints': 21,
'n_repetitions': 5,
'refit': True,
'verbose': False
}
common = {
'size': 30,
'red_size': 15,
'max_sample_size': None, # same as training set
'n_jobs': ENSEMBLE_N_JOBS,
'param_grid': lr_params,
'param_mod_sel': param_mod_sel,
'val_split': 0.4,
'min_pos': 5
}
# hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
# will be skipped (by setting hyperparameters to None)
hyper_none = None
yield 'ehdymaeds', EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none
def run(experiment):
optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
if dataset_name == 'imdb':
return
data = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=5)
run=0
if is_already_computed(args.results, dataset_name, model_name, run=run, optim_loss=optim_loss):
print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
return
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
# model selection (hyperparameter optimization for a quantification-oriented loss)
if hyperparams is not None:
model_selection = qp.model_selection.GridSearchQ(
model,
param_grid=hyperparams,
sample_size=SAMPLE_SIZE,
n_prevpoints=21,
n_repetitions=100,
error=optim_loss,
refit=True,
timeout=60 * 60,
verbose=True
)
model_selection.fit(data.training)
model = model_selection.best_model()
best_params = model_selection.best_params_
else:
model.fit(data.training)
best_params = {}
# model evaluation
true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction(
model,
test=data.test,
sample_size=SAMPLE_SIZE,
n_prevpoints=21, # 21
n_repetitions=10, # 100
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1,
verbose=True
)
test_true_prevalence = data.test.prevalence()
evaluate_experiment(true_prevalences, estim_prevalences)
save_results(args.results, dataset_name, model_name, run, optim_loss,
true_prevalences, estim_prevalences,
data.training.prevalence(), test_true_prevalence,
best_params)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str,
help='path to the directory where to store the results')
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
help='path to the directory with svmperf')
parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
help='path to the directory where to dump QuaNet checkpoints')
args = parser.parse_args()
print(f'Result folder: {args.results}')
np.random.seed(0)
qp.environ['SVMPERF_HOME'] = args.svmperfpath
optim_losses = ['mae']
datasets = qp.datasets.REVIEWS_SENTIMENT_DATASETS
models = quantification_models()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
# models = quantification_cuda_models()
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS)
# models = quantification_ensembles()
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
shutil.rmtree(args.checkpointdir, ignore_errors=True)

View File

@ -0,0 +1,70 @@
import quapy as qp
import numpy as np
from os import makedirs
import sys, os
import pickle
import argparse
from common import *
from reviews_experiments import *
from tabular import Table
import itertools
tables_path = './tables_reviews'
MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results
makedirs(tables_path, exist_ok=True)
qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
METHODS = ['cc', 'acc', 'pcc',
'pacc',
'wpacc',
# 'MAX', 'MS', 'MS2',
'sldc',
# 'svmmae',
# 'hdy',
# 'ehdymaeds',
# 'EMdiag', 'EMfull', 'EMtied', 'EMspherical',
# 'VEMdiag', 'VEMfull', 'VEMtied', 'VEMspherical',
]
if __name__ == '__main__':
results = 'results_reviews'
datasets = qp.datasets.REVIEWS_SENTIMENT_DATASETS
evaluation_measures = [qp.error.ae]
run=0
for i, eval_func in enumerate(evaluation_measures):
eval_name = eval_func.__name__
# Tables evaluation scores for the evaluation measure
# ----------------------------------------------------
# fill data table
table = Table(benchmarks=datasets, methods=METHODS)
for dataset, method in itertools.product(datasets, METHODS):
table.add(dataset, method, experiment_errors(results, dataset, method, run, eval_name))
# write the latex table
nmethods = len(METHODS)
tabular = """
\\resizebox{\\textwidth}{!}{%
\\begin{tabular}{|c||""" + ('c|' * nmethods) + '|' + """} \hline
& \multicolumn{""" + str(nmethods) + """}{c||}{Quantification methods} \\\\ \hline
"""
rowreplace={dataset: nicename(dataset) for dataset in datasets}
colreplace={method: nicename(method, eval_name, side=True) for method in METHODS}
tabular += table.latexTabular(benchmark_replace=rowreplace, method_replace=colreplace)
tabular += 'Rank Average & ' + table.getRankTable().latexAverage()
tabular += """
\end{tabular}%
}
"""
save_table(f'{tables_path}/tab_results_{eval_name}.tex', tabular)
print("[Done]")

321
NewMethods/tabular.py Normal file
View File

@ -0,0 +1,321 @@
import numpy as np
import itertools
from scipy.stats import ttest_ind_from_stats, wilcoxon
class Table:
VALID_TESTS = [None, "wilcoxon", "ttest"]
def __init__(self, benchmarks, methods, lower_is_better=True, ttest='ttest', prec_mean=3,
clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--',
color=True):
assert ttest in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}'
self.benchmarks = np.asarray(benchmarks)
self.benchmark_index = {row: i for i, row in enumerate(benchmarks)}
self.methods = np.asarray(methods)
self.method_index = {col: j for j, col in enumerate(methods)}
self.map = {}
# keyed (#rows,#cols)-ndarrays holding computations from self.map['values']
self._addmap('values', dtype=object)
self.lower_is_better = lower_is_better
self.ttest = ttest
self.prec_mean = prec_mean
self.clean_zero = clean_zero
self.show_std = show_std
self.prec_std = prec_std
self.add_average = average
self.missing = missing
self.missing_str = missing_str
self.color = color
self.touch()
@property
def nbenchmarks(self):
return len(self.benchmarks)
@property
def nmethods(self):
return len(self.methods)
def touch(self):
self._modif = True
def update(self):
if self._modif:
self.compute()
def _getfilled(self):
return np.argwhere(self.map['fill'])
@property
def values(self):
return self.map['values']
def _indexes(self):
return itertools.product(range(self.nbenchmarks), range(self.nmethods))
def _addmap(self, map, dtype, func=None):
self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype)
if func is None:
return
m = self.map[map]
f = func
indexes = self._indexes() if map == 'fill' else self._getfilled()
for i, j in indexes:
m[i, j] = f(self.values[i, j])
def _addrank(self):
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
ranked_cols_idx = filled_cols_idx[np.argsort(col_means)]
if not self.lower_is_better:
ranked_cols_idx = ranked_cols_idx[::-1]
self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx) + 1)
def _addcolor(self):
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
if filled_cols_idx.size == 0:
continue
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
minval = min(col_means)
maxval = max(col_means)
for col_idx in filled_cols_idx:
val = self.map['mean'][i, col_idx]
norm = (maxval - minval)
if norm > 0:
normval = (val - minval) / norm
else:
normval = 0.5
if self.lower_is_better:
normval = 1 - normval
self.map['color'][i, col_idx] = color_red2green_01(normval)
def _run_ttest(self, row, col1, col2):
mean1 = self.map['mean'][row, col1]
std1 = self.map['std'][row, col1]
nobs1 = self.map['nobs'][row, col1]
mean2 = self.map['mean'][row, col2]
std2 = self.map['std'][row, col2]
nobs2 = self.map['nobs'][row, col2]
_, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
return p_val
def _run_wilcoxon(self, row, col1, col2):
values1 = self.map['values'][row, col1]
values2 = self.map['values'][row, col2]
_, p_val = wilcoxon(values1, values2)
return p_val
def _add_statistical_test(self):
if self.ttest is None:
return
self.some_similar = [False] * self.nmethods
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
if len(filled_cols_idx) <= 1:
continue
col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
best_pos = filled_cols_idx[np.argmin(col_means)]
for j in filled_cols_idx:
if j == best_pos:
continue
if self.ttest == 'ttest':
p_val = self._run_ttest(i, best_pos, j)
else:
p_val = self._run_wilcoxon(i, best_pos, j)
pval_outcome = pval_interpretation(p_val)
self.map['ttest'][i, j] = pval_outcome
if pval_outcome != 'Diff':
self.some_similar[j] = True
def compute(self):
self._addmap('fill', dtype=bool, func=lambda x: x is not None)
self._addmap('mean', dtype=float, func=np.mean)
self._addmap('std', dtype=float, func=np.std)
self._addmap('nobs', dtype=float, func=len)
self._addmap('rank', dtype=int, func=None)
self._addmap('color', dtype=object, func=None)
self._addmap('ttest', dtype=object, func=None)
self._addmap('latex', dtype=object, func=None)
self._addrank()
self._addcolor()
self._add_statistical_test()
if self.add_average:
self._addave()
self._modif = False
def _is_column_full(self, col):
return all(self.map['fill'][:, self.method_index[col]])
def _addave(self):
ave = Table(['ave'], self.methods, lower_is_better=self.lower_is_better, ttest=self.ttest, average=False,
missing=self.missing, missing_str=self.missing_str)
for col in self.methods:
values = None
if self._is_column_full(col):
if self.ttest == 'ttest':
values = np.asarray(self.map['mean'][:, self.method_index[col]])
else: # wilcoxon
values = np.concatenate(self.values[:, self.method_index[col]])
ave.add('ave', col, values)
self.average = ave
def add(self, benchmark, method, values):
if values is not None:
values = np.asarray(values)
if values.ndim == 0:
values = values.flatten()
rid, cid = self._coordinates(benchmark, method)
if self.map['values'][rid, cid] is None:
self.map['values'][rid, cid] = values
elif values is not None:
self.map['values'][rid, cid] = np.concatenate([self.map['values'][rid, cid], values])
self.touch()
def get(self, benchmark, method, attr='mean'):
self.update()
assert attr in self.map, f'unknwon attribute {attr}'
rid, cid = self._coordinates(benchmark, method)
if self.map['fill'][rid, cid]:
v = self.map[attr][rid, cid]
if v is None or (isinstance(v, float) and np.isnan(v)):
return self.missing
return v
else:
return self.missing
def _coordinates(self, benchmark, method):
assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range'
assert method in self.method_index, f'method {method} out of range'
rid = self.benchmark_index[benchmark]
cid = self.method_index[method]
return rid, cid
def get_average(self, method, attr='mean'):
self.update()
if self.add_average:
return self.average.get('ave', method, attr=attr)
return None
def get_color(self, benchmark, method):
color = self.get(benchmark, method, attr='color')
if color is None:
return ''
return color
def latex(self, benchmark, method):
self.update()
i, j = self._coordinates(benchmark, method)
if self.map['fill'][i, j] == False:
return self.missing_str
mean = self.map['mean'][i, j]
l = f" {mean:.{self.prec_mean}f}"
if self.clean_zero:
l = l.replace(' 0.', '.')
isbest = self.map['rank'][i, j] == 1
if isbest:
l = "\\textbf{" + l.strip() + "}"
stat = ''
if self.ttest is not None and self.some_similar[j]:
test_label = self.map['ttest'][i, j]
if test_label == 'Sim':
stat = '^{\dag\phantom{\dag}}'
elif test_label == 'Same':
stat = '^{\ddag}'
elif isbest or test_label == 'Diff':
stat = '^{\phantom{\ddag}}'
std = ''
if self.show_std:
std = self.map['std'][i, j]
std = f" {std:.{self.prec_std}f}"
if self.clean_zero:
std = std.replace(' 0.', '.')
std = f" \pm {std:{self.prec_std}}"
if stat != '' or std != '':
l = f'{l}${stat}{std}$'
if self.color:
l += ' ' + self.map['color'][i, j]
return l
def latexTabular(self, benchmark_replace={}, method_replace={}, average=True):
tab = ' & '
tab += ' & '.join([method_replace.get(col, col) for col in self.methods])
tab += ' \\\\\hline\n'
for row in self.benchmarks:
rowname = benchmark_replace.get(row, row)
tab += rowname + ' & '
tab += self.latexRow(row)
if average:
tab += '\hline\n'
tab += 'Average & '
tab += self.latexAverage()
return tab
def latexRow(self, benchmark, endl='\\\\\hline\n'):
s = [self.latex(benchmark, col) for col in self.methods]
s = ' & '.join(s)
s += ' ' + endl
return s
def latexAverage(self, endl='\\\\\hline\n'):
if self.add_average:
return self.average.latexRow('ave', endl=endl)
def getRankTable(self):
t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=0, average=True)
for rid, cid in self._getfilled():
row = self.benchmarks[rid]
col = self.methods[cid]
t.add(row, col, self.get(row, col, 'rank'))
t.compute()
return t
def dropMethods(self, methods):
drop_index = [self.method_index[m] for m in methods]
new_methods = np.delete(self.methods, drop_index)
new_index = {col: j for j, col in enumerate(new_methods)}
self.map['values'] = self.values[:, np.asarray([self.method_index[m] for m in new_methods], dtype=int)]
self.methods = new_methods
self.method_index = new_index
self.touch()
def pval_interpretation(p_val):
if 0.005 >= p_val:
return 'Diff'
elif 0.05 >= p_val > 0.005:
return 'Sim'
elif p_val > 0.05:
return 'Same'
def color_red2green_01(val, maxtone=50):
if np.isnan(val): return None
assert 0 <= val <= 1, f'val {val} out of range [0,1]'
# rescale to [-1,1]
val = val * 2 - 1
if val < 0:
color = 'red'
tone = maxtone * (-val)
else:
color = 'green'
tone = maxtone * val
return '\cellcolor{' + color + f'!{int(tone)}' + '}'

View File

@ -0,0 +1,173 @@
from sklearn.svm import LinearSVC
from class_weight_model import ClassWeightPCC
# from classification.methods import LowRankLogisticRegression
# from method.experimental import ExpMax, VarExpMax
from common import *
from method.meta import QuaNet
from quantification_stumps_model import QuantificationStumpRegressor
from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy
from quapy.method.meta import EHDy
import numpy as np
import os
import pickle
import itertools
import argparse
import torch
import shutil
SAMPLE_SIZE = 100
N_FOLDS = 5
N_REPEATS = 1
N_JOBS = -1
CUDA_N_JOBS = 2
ENSEMBLE_N_JOBS = -1
qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
__C_range = np.logspace(-3, 3, 7)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
def quantification_models():
# yield 'cc', CC(newLR()), lr_params
# yield 'acc', ACC(newLR()), lr_params
yield 'pcc.opt', PCC(newLR()), lr_params
yield 'pacc.opt', PACC(newLR()), lr_params
yield 'wpacc.opt', ClassWeightPCC(), lr_params
yield 'ds.opt', QuantificationStumpRegressor(SAMPLE_SIZE), {'C': __C_range}
# yield 'pcc.opt.svm', PCC(LinearSVC()), lr_params
# yield 'pacc.opt.svm', PACC(LinearSVC()), lr_params
# yield 'wpacc.opt.svm', ClassWeightPCC(LinearSVC), lr_params
# yield 'wpacc.opt2', ClassWeightPCC(C=__C_range), lr_params # this cannot work in its current version (see notes in the class_weight_model.py file)
# yield 'MAX', MAX(newLR()), lr_params
# yield 'MS', MS(newLR()), lr_params
# yield 'MS2', MS2(newLR()), lr_params
yield 'sldc', EMQ(calibratedLR()), lr_params
# yield 'svmmae', SVMAE(), svmperf_params
# yield 'hdy', HDy(newLR()), lr_params
# yield 'EMdiag', ExpMax(cov_type='diag'), None
# yield 'EMfull', ExpMax(cov_type='full'), None
# yield 'EMtied', ExpMax(cov_type='tied'), None
# yield 'EMspherical', ExpMax(cov_type='spherical'), None
# yield 'VEMdiag', VarExpMax(cov_type='diag'), None
# yield 'VEMfull', VarExpMax(cov_type='full'), None
# yield 'VEMtied', VarExpMax(cov_type='tied'), None
# yield 'VEMspherical', VarExpMax(cov_type='spherical'), None
# def quantification_cuda_models():
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# print(f'Running QuaNet in {device}')
# learner = LowRankLogisticRegression(**newLR().get_params())
# yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
# def quantification_ensembles():
# param_mod_sel = {
# 'sample_size': SAMPLE_SIZE,
# 'n_prevpoints': 21,
# 'n_repetitions': 5,
# 'refit': True,
# 'verbose': False
# }
# common = {
# 'size': 30,
# 'red_size': 15,
# 'max_sample_size': None, # same as training set
# 'n_jobs': ENSEMBLE_N_JOBS,
# 'param_grid': lr_params,
# 'param_mod_sel': param_mod_sel,
# 'val_split': 0.4,
# 'min_pos': 5
# }
#
# hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
# will be skipped (by setting hyperparameters to None)
# hyper_none = None
# yield 'ehdymaeds', EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none
def run(experiment):
optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
collection = qp.datasets.fetch_UCILabelledCollection(dataset_name)
for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=N_FOLDS, nrepeats=N_REPEATS)):
if is_already_computed(args.results, dataset_name, model_name, run=run, optim_loss=optim_loss):
print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
continue
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
# model selection (hyperparameter optimization for a quantification-oriented loss)
if hyperparams is not None:
model_selection = qp.model_selection.GridSearchQ(
model,
param_grid=hyperparams,
sample_size=SAMPLE_SIZE,
n_prevpoints=21,
n_repetitions=25,
error=optim_loss,
refit=True,
timeout=60 * 60,
verbose=True
)
model_selection.fit(data.training)
model = model_selection.best_model()
best_params = model_selection.best_params_
else:
model.fit(data.training)
best_params = {}
# model evaluation
true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction(
model,
test=data.test,
sample_size=SAMPLE_SIZE,
n_prevpoints=21,
n_repetitions=100,
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
)
test_true_prevalence = data.test.prevalence()
evaluate_experiment(true_prevalences, estim_prevalences)
save_results(args.results, dataset_name, model_name, run, optim_loss,
true_prevalences, estim_prevalences,
data.training.prevalence(), test_true_prevalence,
best_params)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run experiments for UCI ML Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str,
help='path to the directory where to store the results')
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
help='path to the directory with svmperf')
parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
help='path to the directory where to dump QuaNet checkpoints')
args = parser.parse_args()
print(f'Result folder: {args.results}')
np.random.seed(0)
qp.environ['SVMPERF_HOME'] = args.svmperfpath
optim_losses = ['mae']
datasets = qp.datasets.UCI_DATASETS
models = quantification_models()
# for runargs in itertools.product(optim_losses, datasets, models):
# run(runargs)
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
# models = quantification_cuda_models()
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS)
# models = quantification_ensembles()
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
shutil.rmtree(args.checkpointdir, ignore_errors=True)

100
NewMethods/uci_plots.py Normal file
View File

@ -0,0 +1,100 @@
import quapy as qp
import os
import pathlib
import pickle
from glob import glob
import sys
from plot_driftbox import brokenbar_supremacy_by_drift
from uci_experiments import *
from uci_tables import METHODS
from os.path import join
qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
plotext='png'
resultdir = './results_uci'
plotdir = './plots_uci'
os.makedirs(plotdir, exist_ok=True)
N_RUNS = N_FOLDS * N_REPEATS
def gather_results(methods, error_name, resultdir):
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
for method in methods:
for run in range(N_RUNS):
for experiment in glob(f'{resultdir}/*-{method}-run{run}-m{error_name}.pkl'):
true_prevalences, estim_prevalences, tr_prev, te_prev, best_params = pickle.load(open(experiment, 'rb'))
method_names.append(nicename(method))
true_prevs.append(true_prevalences)
estim_prevs.append(estim_prevalences)
tr_prevs.append(tr_prev)
return method_names, true_prevs, estim_prevs, tr_prevs
def plot_error_by_drift(methods, error_name, logscale=False, path=None):
print('plotting error by drift')
if path is not None:
path = join(path, f'error_by_drift_{error_name}.{plotext}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
qp.plot.error_by_drift(
method_names,
true_prevs,
estim_prevs,
tr_prevs,
n_bins=20,
error_name=error_name,
show_std=True,
logscale=logscale,
title=f'Quantification error as a function of distribution shift',
savepath=path
)
def diagonal_plot(methods, error_name, path=None):
print('plotting diagonal plots')
if path is not None:
path = join(path, f'diag_{error_name}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', legend=True, show_std=True, savepath=f'{path}_pos.{plotext}')
def binary_bias_global(methods, error_name, path=None):
print('plotting bias global')
if path is not None:
path = join(path, f'globalbias_{error_name}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', savepath=f'{path}_pos.{plotext}')
def binary_bias_bins(methods, error_name, path=None):
print('plotting bias local')
if path is not None:
path = join(path, f'localbias_{error_name}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', legend=True, savepath=f'{path}_pos.{plotext}')
def brokenbar_supr(methods, error_name, path=None):
print('plotting brokenbar_supr')
if path is not None:
path = join(path, f'broken_{error_name}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=10, binning='isometric',
x_error='ae', y_error='ae', ttest_alpha=0.005, tail_density_threshold=0.005,
savepath=path)
if __name__ == '__main__':
# plot_error_by_drift(METHODS, error_name='ae', path=plotdir)
# diagonal_plot(METHODS, error_name='ae', path=plotdir)
# binary_bias_global(METHODS, error_name='ae', path=plotdir)
# binary_bias_bins(METHODS, error_name='ae', path=plotdir)
# brokenbar_supr(METHODS, error_name='ae', path=plotdir)
brokenbar_supr(METHODS, error_name='ae', path=plotdir)

81
NewMethods/uci_tables.py Normal file
View File

@ -0,0 +1,81 @@
import quapy as qp
import numpy as np
from os import makedirs
import sys, os
import pickle
import argparse
from common import *
from uci_experiments import result_path
from tabular import Table
from uci_experiments import *
import itertools
tables_path = './tables_uci'
MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results
makedirs(tables_path, exist_ok=True)
qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
METHODS = [#'cc', 'acc',
# 'pcc',
# 'pacc',
# 'wpacc',
'pcc.opt',
'pacc.opt',
'wpacc.opt',
'ds.opt',
# 'pcc.opt.svm',
# 'pacc.opt.svm',
# 'wpacc.opt.svm',
# 'wpacc.opt2',
# 'MAX', 'MS', 'MS2',
'sldc',
# 'svmmae',
# 'hdy',
# 'ehdymaeds',
# 'EMdiag', 'EMfull', 'EMtied', 'EMspherical',
# 'VEMdiag', 'VEMfull', 'VEMtied', 'VEMspherical',
]
if __name__ == '__main__':
results = 'results_uci'
datasets = qp.datasets.UCI_DATASETS
datasets.remove('acute.a')
datasets.remove('acute.b')
datasets.remove('iris.1')
evaluation_measures = [qp.error.ae, qp.error.rae, qp.error.kld]
for i, eval_func in enumerate(evaluation_measures):
eval_name = eval_func.__name__
# Tables evaluation scores for the evaluation measure
# ----------------------------------------------------
# fill data table
table = Table(benchmarks=datasets, methods=METHODS)
for dataset, method, run in itertools.product(datasets, METHODS, range(N_FOLDS*N_REPEATS)):
table.add(dataset, method, experiment_errors(results, dataset, method, run, eval_name, optim_loss='ae'))
# write the latex table
nmethods = len(METHODS)
tabular = """
\\resizebox{\\textwidth}{!}{%
\\begin{tabular}{|c||""" + ('c|' * nmethods) + '|' + """} \hline
& \multicolumn{""" + str(nmethods) + """}{c||}{Quantification methods} \\\\ \hline
"""
rowreplace={dataset: nicename(dataset) for dataset in datasets}
colreplace={method: nicename(method, eval_name, side=True) for method in METHODS}
tabular += table.latexTabular(benchmark_replace=rowreplace, method_replace=colreplace)
tabular += 'Rank Average & ' + table.getRankTable().latexAverage()
tabular += """
\end{tabular}%
}
"""
save_table(f'{tables_path}/tab_results_{eval_name}.tex', tabular)
print("[Done]")

View File

@ -333,6 +333,7 @@ class Dataset:
yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
def isbinary(data):
if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
return data.binary

View File

@ -228,10 +228,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
if show_std:
ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25)
# xs = bins[:-1]
# ys = inds_histogram_global
# print(xs.shape, ys.shape)
# ax.errorbar(xs, ys, label='density')
xs = bins[:-1]
ys = inds_histogram_global
print(xs.shape, ys.shape)
ax.errorbar(xs, ys, label='density')
ax.set(xlabel=f'Distribution shift between training set and test sample',
ylabel=f'{error_name.upper()} (true distribution, predicted distribution)',

View File

@ -41,9 +41,9 @@ def parallel(func, args, n_jobs):
)
that takes the quapy.environ variable as input silently
"""
def func_dec(environ, *args):
def func_dec(environ, *args_i):
qp.environ = environ
return func(*args)
return func(*args_i)
return Parallel(n_jobs=n_jobs)(
delayed(func_dec)(qp.environ, args_i) for args_i in args
)