plots update

adding broken bar plot
doing tests with quantification stumps
2022-01-14 10:02:36 +01:00 · 2021-11-22 17:21:28 +01:00 · 2021-11-18 16:57:50 +01:00 · 2021-11-17 17:07:13 +01:00 · 2021-11-16 17:41:22 +01:00 · 2021-11-14 01:20:43 +01:00
13 changed files with 1416 additions and 6 deletions
--- a/NewMethods/ClassWeightQuantification.py
+++ b/NewMethods/ClassWeightQuantification.py
@ -0,0 +1,212 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import sklearn.preprocessing
 from matplotlib import cm
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
 from sklearn.datasets import make_blobs
 from sklearn.model_selection import train_test_split
 from sklearn.utils.class_weight import compute_class_weight
 from sklearn.preprocessing import normalize
 import quapy as qp
 import quapy.functional as F
 from quapy.data import LabelledCollection
 from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ
 import os
 from scipy.stats import ttest_rel
 """
 The idea of this method is to make a first guess of the test class distribution (maybe with PACC) and then
 train a method without adjustment (maybe PCC) setting the class_weight param in such a way that best compensates
 for the positive and negative contribution wrt the guessed distribution. The method can be iterative, though I 
 have not seen any major inprovements (if at all) in doing more than 1 iteration.
 This file is the proof of concept with artificial data and nice plots. The quantifier is implemented in file
 class_weight_model.py. 
 So far, it looks like for artificial datasets works, for UCI (without model selection for now) works better than PACC.
 For reviews it does not improve over PACC though. 
 """
 x_min, x_max = 0, 11
 y_min, y_max = 0, x_max
 center0 = (2*x_max/5,2*x_max/5)
 center1 = (3*x_max/5,3*x_max/5)
 X, Y = make_blobs(n_samples=[100000, 100000], n_features=2, centers=[center0,center1])
 data = LabelledCollection(X, Y)
 train_pool, test_pool = data.split_stratified(train_prop=0.5)
 def plot(fignum, title, savepath=None):
    clf = q.learner
    # get the separating hyperplane
    w = clf.coef_[0]
    a = -w[0] / w[1]
    xx = np.linspace(0, x_max)
    yy = a * xx - (clf.intercept_[0]) / w[1]
    wref = reference_hyperplane.coef_[0]
    aref = -wref[0] / wref[1]
    YY, XX = np.meshgrid(yy, xx)
    xy = np.vstack([XX.ravel(), YY.ravel()]).T
    # Z = clf.decision_function(xy).reshape(XX.shape)
    # Z2 = reference_hyperplane.decision_function(xy).reshape(XX.shape)
    # plot the line and the points
    plt.figure(fignum + 1, figsize=(10, 10))
    plt.clf()
    plt.plot(xx, yy, "k-")
    Xte, yte = test.Xy
    # plt.scatter(Xte[:, 0], Xte[:, 1], c=test.labels, zorder=10, cmap=cm.get_cmap("RdBu"), alpha=0.4)
    cmap=cm.get_cmap("RdBu")
    plt.scatter(Xte[yte==0][:, 0], Xte[yte==0][:, 1], color=cmap(0), zorder=10, alpha=0.4, label='-')
    plt.scatter(Xte[yte==1][:, 0], Xte[yte==1][:, 1], color=cmap(cmap.N-1), zorder=10, alpha=0.4, label='+')
    plt.axis("tight")
    # Put the result into a contour plot
    # plt.contourf(XX, YY, Z, cmap=cm.get_cmap("RdBu"), alpha=0.6, levels=50, linestyles=None)
    plt.plot(xx, a * xx - (clf.intercept_[0]) / w[1], 'k-', label='modified')
    plt.plot(xx, aref * xx - (reference_hyperplane.intercept_[0]) / wref[1], 'k--', label='original')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xticks(())
    plt.yticks(())
    plt.title(title)
    plt.legend()
    if savepath:
        plt.savefig(savepath)
 def mock_y(prev):
    n=10000
    nneg = int(n * prev[0])
    npos = int(n * prev[1])
    mock = np.asarray([0]*nneg + [1]*npos, dtype=int)
    return mock
 def get_class_weight(prevalence):
    # class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence))
    # return {0: class_weight[1], 1: class_weight[0]}
    # weights = prevalence/prevalence.min()
    weights = prevalence / train.prevalence()
    normfactor = weights.min()
    if normfactor <= 0:
        normfactor = 1E-3
    weights /= normfactor
    return {0:weights[0], 1:weights[1]}
 def train_eval(class_weight, test):
    q = Method(LogisticRegression(class_weight=class_weight))
    q.fit(train)
    prev_estim = q.quantify(test.instances)
    true_prev = test.prevalence()
    ae = qp.error.ae(true_prev, prev_estim)
    return q, prev_estim, ae
 probabilistic = True
 Prompter = PACC  # the method creating the very first guess
 Baseline = PACC if probabilistic else ACC
 bname = Baseline.__name__
 Method = PCC if probabilistic else CC
 mname = Method.__name__
 plotdir=f'./plots/{mname}_vs_{bname}'
 os.makedirs(plotdir, exist_ok=True)
 test_prevs = np.linspace(0,1,20)
 train_prevs = np.linspace(0.05,0.95,20)
 fignum = 0
 wins, total = 0, 0
 merrors = []
 berrors = []
 for ptr in train_prevs:
    train = train_pool.sampling(10000, ptr)
    reference_hyperplane = LogisticRegression().fit(*train.Xy)
    baseline = Baseline(LogisticRegression()).fit(train)
    if Baseline != Prompter:
        prompter = Prompter(LogisticRegression()).fit(train)
    else:
        prompter = baseline
    for pte in test_prevs:
        test = test_pool.sampling(10000, pte)
        # some baseline results
        prev_estim_acc = baseline.quantify(test.instances)
        ae_baseline = qp.error.ae(test.prevalence(), prev_estim_acc)
        berrors.append(ae_baseline)
        # guessed_prevalence = train.prevalence()
        guessed_prevalence = prompter.quantify(test.instances)
        niter=10
        last_prev = None
        for i in range(niter):
            class_weight = get_class_weight(guessed_prevalence)
            q, prev_estim, ae = train_eval(class_weight, test)
            stop = (i == niter-1) or (last_prev is not None and qp.error.ae(prev_estim, last_prev) < 0.001)
            if stop:
                merrors.append(ae)
                win = ae < ae_baseline
                if win: wins+=1
                print(f'{i}: tr_prev={F.strprev(train.prevalence())} te_prev={F.strprev(test.prevalence())}, {mname}+ estim_prev={F.strprev(prev_estim)} AE={ae:.5f} '
                      f'using class_weight [{class_weight[0]:.3f}, {class_weight[1]:.3f}] '
                      f'({bname} prev={F.strprev(prev_estim_acc)} AE={ae_baseline:.5f}) '
                      f'{"WIN" if win else "LOSE"}')
                break
            else:
                last_prev = prev_estim
            # title='$\hat{{p}}^{{{}}}={:.3f}$, $p={:.3f}$, $\hat{{p}}={:.3f}$, AE$_{{{}}}={:.3f}$, AE$_{{{}}}={:.3f}$'.format(
            #     i, guessed_prevalence[0], pte, prev_estim[0], mname, ae, bname, ae_baseline
            # )
            # savepath=os.path.join(plotdir, f'tr_{ptr}_te{pte}_{i}.png')
            # plot(fignum, title, savepath)
            fignum+=1
            guessed_prevalence = prev_estim
        total += 1
 merrors = np.asarray(merrors)
 berrors = np.asarray(berrors)
 mean_merrors = merrors.mean()
 mean_berrors = berrors.mean()
 print(f'WINS={wins}/{total}={100*wins/total:.2f}%')
 _,p_val = ttest_rel(merrors,berrors)
 print(f'{mname}-ave={mean_merrors:.5f} {bname}-ave={mean_berrors:.5f}')
 print(f'ttest p-value={p_val:5f} significant={p_val<0.05}')
--- a/NewMethods/QuantificationStumps.py
+++ b/NewMethods/QuantificationStumps.py
@ -0,0 +1,87 @@
 from sklearn.base import BaseEstimator
 import numpy as np
 import quapy as qp
 import quapy.functional as F
 from data import LabelledCollection
 from method.aggregative import ACC
 from method.base import BaseQuantifier
 from tqdm import tqdm
 import seaborn as sns
 import matplotlib.pyplot as plt
 data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=10)
 class DecisionStump(BaseEstimator):
    def __init__(self, feat_id):
        self.feat_id = feat_id
        self.classes_ = np.asarray([0,1], dtype=int)
    def fit(self, X, y):
        return self
    def predict(self, X):
        return (X[:,self.feat_id].toarray().flatten()>0).astype(int)
 class QuantificationStump(BaseQuantifier):
    def __init__(self, feat_id):
        self.feat_id = feat_id
    def fit(self, data: LabelledCollection):
        self.qs = ACC(DecisionStump(self.feat_id))
        self.qs.fit(data, fit_learner=False, val_split=data)
        self.classes = data.classes_
        return self
    def quantify(self, instances):
        return self.qs.quantify(instances)
    def set_params(self, **parameters):
        raise NotImplemented()
    def get_params(self, deep=True):
        raise NotImplemented()
    @property
    def classes_(self):
        return self.classes
 train, dev = data.training.split_stratified()
 test = data.test.sampling(1000, 0.3, 0.7)
 print(f'test prevalence = {F.strprev(test.prevalence())}')
 nF = train.instances.shape[1]
 qs_scores = []
 qs = np.asarray([QuantificationStump(i).fit(train) for i in tqdm(range(nF))])
 scores = np.zeros(shape=(nF, 11*5))
 for j, dev_sample in tqdm(enumerate(dev.artificial_sampling_generator(500, n_prevalences=11, repeats=5)), total=11*5):
    sample_prev = dev_sample.prevalence()
    for i, qs_i in enumerate(qs):
        estim_prev = qs_i.quantify(dev.instances)
        error = qp.error.ae(sample_prev, estim_prev)
        scores[i,j] = error
 k=250
 scores = scores.mean(axis=1)
 order = np.argsort(scores)
 qs = qs[order][:k]
 prevs = np.asarray([qs_i.quantify(test.instances)[1] for qs_i in tqdm(qs)])
 print(f'test estimation mean {prevs.mean():.3f}, median = {np.median(prevs)}')
 # sns.histplot(data=prevs, binwidth=3)
 # An "interface" to matplotlib.axes.Axes.hist() method
 # n, bins, patches = plt.hist(x=prevs, bins='auto', alpha=0.7)
 # plt.grid(axis='y', alpha=0.75)
 # plt.xlabel('Value')
 # plt.ylabel('Frequency')
 # plt.title('My Very Own Histogram')
 # maxfreq = n.max()
 # Set a clean upper y-axis limit.
 # plt.ylim(ymax=np.ceil(maxfreq / 10) * 10 if maxfreq % 10 else maxfreq + 10)
 # plt.show()
--- a/NewMethods/class_weight_model.py
+++ b/NewMethods/class_weight_model.py
@ -0,0 +1,94 @@
 from sklearn import clone
 from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
 import numpy as np
 from sklearn.model_selection import GridSearchCV
 import quapy as qp
 from data import LabelledCollection
 from method.base import BaseQuantifier
 from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier, CC, ACC, PCC, PACC
 """
 Possible extensions:
    - add CC and ClassWeightCC
    - understanding how to optimize hyper-parameters for the final PCC quantifier. It is not trivial, since once
        class_weight has been set, the C parameter plays a secondary role. The reason is that I hardly doubt that
        the cross-validation is taking into account the fact that one class might be more important than the other, 
        and so the best C parameter for quantifying, conditioned on this class prevelance, has nothing to do with the
        best C for classifying the current data... Unless I define an evaluation metric weighting for each class weight,
        but this is very tricky (it is like implementing the "adjustment" in the evaluation metric...)
    - might be worth investigating deeper about the role of CV, and val_split, in ACC/PACC. Is it something that
        consistently deliver improved accuracies (for quantification) or there is a tricky trade-off between the data
        usage, the instability due to adjusting for slightly different quantifiers, and so on?
    - argue that this method is only interesting in cases in which we have few data (adjustment discards data), 
        and not when the classifier is a costly one (we require training during test). Argue that the computational
        burden can be transfered to the training stage, by training many LR for different class_weight ratios, and
        then using the most similar one, to the guessed prevalence, during test.
    - better investigate the "iterative" nature of the method.
    - better investigate the implications with other learners. E.g., using EMQ as a prompt, or using EMQ in the second
        stage (test).
    - test with SVM (not working well... and problematic due to the fact that svms need to be calibrated)
    - test in multiclass scenarios
 """
 class ClassWeightPCC(BaseQuantifier):
    def __init__(self, estimator=LogisticRegression, **pcc_param_grid):
        self.estimator = estimator
        self.learner = PACC(self.estimator())
        if 'class_weight' in pcc_param_grid:
            raise ValueError('parameter "class_weight" cannot be included in "pcc_param_grid"')
        self.pcc_param_grid = dict(pcc_param_grid)
        self.deployed = False
    def fit(self, data: LabelledCollection, fit_learner=True):
        self.train = data
        self.learner.fit(self.train)
        return self
    def quantify(self, instances):
        guessed_prevalence = self.learner.quantify(instances)
        class_weight = self._get_class_weight(guessed_prevalence)
        if self.pcc_param_grid and self.deployed:
            """If the param grid has been specified, then use it to find good hyper-parameters for the classifier.
            In this case, we know (an approximation of) the target prevalence, so we might simply want to optimize
            for classification (and not for quantification)"""
            # pcc = PCC(GridSearchCV(LogisticRegression(class_weight=class_weight), param_grid=self.pcc_param_grid, n_jobs=-1))
            pcc = PCC(LogisticRegressionCV(Cs=self.pcc_param_grid['C'], class_weight=class_weight, n_jobs=-1, cv=3))
            raise ValueError('this cannot work...')
        else:
            """If the param grid has not been specified, we take the best parameters found for the base quantifier"""
            base_parameters = dict(self.learner.get_params())
            for p,v in self.learner.get_params().items():
                # this search is in order to allow for quantifiers that work with a CalibratedClassifierCV to work
                if 'class_weight' in p:
                    base_parameters[p] = class_weight
                    break
            base_estimator = clone(self.learner.learner)
            base_estimator.set_params(**base_parameters)
            pcc = PCC(base_estimator)
        return pcc.fit(self.train).quantify(instances)
    def _get_class_weight(self, prevalence):
        # class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence))
        # return {0: class_weight[1], 1: class_weight[0]}
        # weights = prevalence/prevalence.min()
        weights = prevalence / self.train.prevalence()
        normfactor = weights.min()
        if normfactor <= 0:
            normfactor = 1E-3
        weights /= normfactor
        return {0:weights[0], 1:weights[1]}
    def set_params(self, **parameters):
        # parameters = {p:v for p,v in parameters.items()}
        # print(parameters)
        self.learner.set_params(**parameters)
    def get_params(self, deep=True):
        return self.learner.get_params()
    @property
    def classes_(self):
        return self.train.classes_
--- a/NewMethods/common.py
+++ b/NewMethods/common.py
@ -0,0 +1,97 @@
 import pickle
 import os
 from sklearn.calibration import CalibratedClassifierCV
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
 def newLR():
    return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
 def calibratedLR():
    return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1))
 def save_results(result_dir, dataset_name, model_name, run, optim_loss, *results):
    rpath = result_path(result_dir, dataset_name, model_name, run, optim_loss)
    qp.util.create_parent_dir(rpath)
    with open(rpath, 'wb') as foo:
        pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
 def evaluate_experiment(true_prevalences, estim_prevalences):
    print('\nEvaluation Metrics:\n' + '=' * 22)
    for eval_measure in [qp.error.mae, qp.error.mrae]:
        err = eval_measure(true_prevalences, estim_prevalences)
        print(f'\t{eval_measure.__name__}={err:.4f}')
    print()
 def result_path(path, dataset_name, model_name, run, optim_loss):
    return os.path.join(path, f'{dataset_name}-{model_name}-run{run}-{optim_loss}.pkl')
 def is_already_computed(result_dir, dataset_name, model_name, run, optim_loss):
    return os.path.exists(result_path(result_dir, dataset_name, model_name, run, optim_loss))
 nice = {
    'pacc.opt': 'PACC(LR)',
    'pacc.opt.svm': 'PACC(SVM)',
    'pcc.opt': 'PCC(LR)',
    'pcc.opt.svm': 'PCC(SVM)',
    'wpacc.opt': 'R-PCC(LR)',
    'wpacc.opt.svm': 'R-PCC(SVM)',
    'mae':'AE',
    'ae':'AE',
    'svmkld': 'SVM(KLD)',
    'svmnkld': 'SVM(NKLD)',
    'svmq': 'SVM(Q)',
    'svmae': 'SVM(AE)',
    'svmmae': 'SVM(AE)',
    'svmmrae': 'SVM(RAE)',
    'hdy': 'HDy',
    'sldc': 'SLD',
    'X': 'TSX',
    'T50': 'TS50',
    'ehdymaeds': 'E(HDy)$_\mathrm{DS}$',
    'Average': 'Average',
    'EMdiag':'EM$_{diag}$', 'EMfull':'EM$_{full}$', 'EMtied':'EM$_{tied}$', 'EMspherical':'EM$_{sph}$',
    'VEMdiag':'VEM$_{diag}$', 'VEMfull':'VEM$_{full}$', 'VEMtied':'VEM$_{tied}$', 'VEMspherical':'VEM$_{sph}$',
    'epaccmaemae1k': 'E(PACC)$_\mathrm{AE}$',
    'quanet': 'QuaNet'
 }
 def nicerm(key):
    return '\mathrm{'+nice[key]+'}'
 def nicename(method, eval_name=None, side=False):
    m = nice.get(method, method.upper())
    if eval_name is not None:
        m = m.replace('$$','')
    if side:
        m = '\side{'+m+'}'
    return m
 def save_table(path, table):
    print(f'saving results in {path}')
    with open(path, 'wt') as foo:
        foo.write(table)
 def experiment_errors(path, dataset, method, run, eval_loss, optim_loss=None):
    if optim_loss is None:
        optim_loss = eval_loss
    path = result_path(path, dataset, method, run, 'm' + optim_loss if not optim_loss.startswith('m') else optim_loss)
    if os.path.exists(path):
        true_prevs, estim_prevs, _, _, _ = pickle.load(open(path, 'rb'))
        err_fn = getattr(qp.error, eval_loss)
        errors = err_fn(true_prevs, estim_prevs)
        return errors
    return None
--- a/NewMethods/reviews_experiments.py
+++ b/NewMethods/reviews_experiments.py
@ -0,0 +1,174 @@
 from sklearn.calibration import CalibratedClassifierCV
 import quapy as qp
 from sklearn.linear_model import LogisticRegression
 from class_weight_model import ClassWeightPCC
 # from classification.methods import LowRankLogisticRegression
 # from method.experimental import ExpMax, VarExpMax
 from common import *
 from method.meta import QuaNet
 from quantification_stumps_model import QuantificationStumpRegressor
 from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy
 from quapy.method.meta import EHDy
 import numpy as np
 import os
 import pickle
 import itertools
 import argparse
 import torch
 import shutil
 SAMPLE_SIZE = 500
 N_JOBS = -1
 CUDA_N_JOBS = 2
 ENSEMBLE_N_JOBS = -1
 qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
 __C_range = np.logspace(-3, 3, 7)
 lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
 svmperf_params = {'C': __C_range}
 def quantification_models():
    # yield 'cc', CC(newLR()), lr_params
    # yield 'acc', ACC(newLR()), lr_params
    # yield 'pcc', PCC(newLR()), None
    # yield 'pacc', PACC(newLR()), None
    # yield 'wpacc', ClassWeightPCC(), None
    # yield 'pcc.opt', PCC(newLR()), lr_params
    # yield 'pacc.opt', PACC(newLR()), lr_params
    # yield 'wpacc.opt', ClassWeightPCC(), lr_params
    yield 'ds', QuantificationStumpRegressor(SAMPLE_SIZE, 21, 10), None
    # yield 'ds.opt', QuantificationStumpRegressor(SAMPLE_SIZE), {'C': __C_range}
    # yield 'MAX', MAX(newLR()), lr_params
    # yield 'MS', MS(newLR()), lr_params
    # yield 'MS2', MS2(newLR()), lr_params
    # yield 'sldc', EMQ(calibratedLR()), lr_params
    # yield 'svmmae', SVMAE(), svmperf_params
    # yield 'hdy', HDy(newLR()), lr_params
    # yield 'EMdiag', ExpMax(cov_type='diag'), None
    # yield 'EMfull', ExpMax(cov_type='full'), None
    # yield 'EMtied', ExpMax(cov_type='tied'), None
    # yield 'EMspherical', ExpMax(cov_type='spherical'), None
    # yield 'VEMdiag', VarExpMax(cov_type='diag'), None
    # yield 'VEMfull', VarExpMax(cov_type='full'), None
    # yield 'VEMtied', VarExpMax(cov_type='tied'), None
    # yield 'VEMspherical', VarExpMax(cov_type='spherical'), None
 # def quantification_cuda_models():
 #     device = 'cuda' if torch.cuda.is_available() else 'cpu'
 #     print(f'Running QuaNet in {device}')
 #     learner = LowRankLogisticRegression(**newLR().get_params())
 #     yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
 def quantification_ensembles():
    param_mod_sel = {
        'sample_size': SAMPLE_SIZE,
        'n_prevpoints': 21,
        'n_repetitions': 5,
        'refit': True,
        'verbose': False
    }
    common = {
        'size': 30,
        'red_size': 15,
        'max_sample_size': None,  # same as training set
        'n_jobs': ENSEMBLE_N_JOBS,
        'param_grid': lr_params,
        'param_mod_sel': param_mod_sel,
        'val_split': 0.4,
        'min_pos': 5
    }
    # hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
    # will be skipped (by setting hyperparameters to None)
    hyper_none = None
    yield 'ehdymaeds',  EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none
 def run(experiment):
    optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
    if dataset_name == 'imdb':
        return
    data = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=5)
    run=0
    if is_already_computed(args.results, dataset_name, model_name, run=run, optim_loss=optim_loss):
        print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
        return
    print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
    # model selection (hyperparameter optimization for a quantification-oriented loss)
    if hyperparams is not None:
        model_selection = qp.model_selection.GridSearchQ(
            model,
            param_grid=hyperparams,
            sample_size=SAMPLE_SIZE,
            n_prevpoints=21,
            n_repetitions=100,
            error=optim_loss,
            refit=True,
            timeout=60 * 60,
            verbose=True
        )
        model_selection.fit(data.training)
        model = model_selection.best_model()
        best_params = model_selection.best_params_
    else:
        model.fit(data.training)
        best_params = {}
    # model evaluation
    true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction(
        model,
        test=data.test,
        sample_size=SAMPLE_SIZE,
        n_prevpoints=21,  # 21
        n_repetitions=10,  # 100
        n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1,
        verbose=True
    )
    test_true_prevalence = data.test.prevalence()
    evaluate_experiment(true_prevalences, estim_prevalences)
    save_results(args.results, dataset_name, model_name, run, optim_loss,
                 true_prevalences, estim_prevalences,
                 data.training.prevalence(), test_true_prevalence,
                 best_params)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
    parser.add_argument('results', metavar='RESULT_PATH', type=str,
                        help='path to the directory where to store the results')
    parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
                        help='path to the directory with svmperf')
    parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
                        help='path to the directory where to dump QuaNet checkpoints')
    args = parser.parse_args()
    print(f'Result folder: {args.results}')
    np.random.seed(0)
    qp.environ['SVMPERF_HOME'] = args.svmperfpath
    optim_losses = ['mae']
    datasets = qp.datasets.REVIEWS_SENTIMENT_DATASETS
    models = quantification_models()
    qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
    # models = quantification_cuda_models()
    # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS)
    # models = quantification_ensembles()
    # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
    shutil.rmtree(args.checkpointdir, ignore_errors=True)
--- a/NewMethods/reviews_tables.py
+++ b/NewMethods/reviews_tables.py
@ -0,0 +1,70 @@
 import quapy as qp
 import numpy as np
 from os import makedirs
 import sys, os
 import pickle
 import argparse
 from common import *
 from reviews_experiments import *
 from tabular import Table
 import itertools
 tables_path = './tables_reviews'
 MAXTONE = 50  # sets the intensity of the maximum color reached by the worst (red) and best (green) results
 makedirs(tables_path, exist_ok=True)
 qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
 METHODS = ['cc', 'acc', 'pcc',
           'pacc',
           'wpacc',
           # 'MAX', 'MS', 'MS2',
           'sldc',
           # 'svmmae',
           # 'hdy',
           # 'ehdymaeds',
           # 'EMdiag', 'EMfull', 'EMtied', 'EMspherical',
           # 'VEMdiag', 'VEMfull', 'VEMtied', 'VEMspherical',
           ]
 if __name__ == '__main__':
    results = 'results_reviews'
    datasets = qp.datasets.REVIEWS_SENTIMENT_DATASETS
    evaluation_measures = [qp.error.ae]
    run=0
    for i, eval_func in enumerate(evaluation_measures):
        eval_name = eval_func.__name__
        # Tables evaluation scores for the evaluation measure
        # ----------------------------------------------------
        # fill data table
        table = Table(benchmarks=datasets, methods=METHODS)
        for dataset, method in itertools.product(datasets, METHODS):
            table.add(dataset, method, experiment_errors(results, dataset, method, run, eval_name))
        # write the latex table
        nmethods = len(METHODS)
        tabular = """
                \\resizebox{\\textwidth}{!}{%
                        \\begin{tabular}{|c||""" + ('c|' * nmethods) + '|' + """} \hline
                          & \multicolumn{""" + str(nmethods) + """}{c||}{Quantification methods} \\\\ \hline
                  """
        rowreplace={dataset: nicename(dataset) for dataset in datasets}
        colreplace={method: nicename(method, eval_name, side=True) for method in METHODS}
        tabular += table.latexTabular(benchmark_replace=rowreplace, method_replace=colreplace)
        tabular += 'Rank Average & ' + table.getRankTable().latexAverage()
        tabular += """
            \end{tabular}%
            }
        """
        save_table(f'{tables_path}/tab_results_{eval_name}.tex', tabular)
    print("[Done]")
--- a/NewMethods/tabular.py
+++ b/NewMethods/tabular.py
@ -0,0 +1,321 @@
 import numpy as np
 import itertools
 from scipy.stats import ttest_ind_from_stats, wilcoxon
 class Table:
    VALID_TESTS = [None, "wilcoxon", "ttest"]
    def __init__(self, benchmarks, methods, lower_is_better=True, ttest='ttest', prec_mean=3,
                 clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--',
                 color=True):
        assert ttest in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}'
        self.benchmarks = np.asarray(benchmarks)
        self.benchmark_index = {row: i for i, row in enumerate(benchmarks)}
        self.methods = np.asarray(methods)
        self.method_index = {col: j for j, col in enumerate(methods)}
        self.map = {}
        # keyed (#rows,#cols)-ndarrays holding computations from self.map['values']
        self._addmap('values', dtype=object)
        self.lower_is_better = lower_is_better
        self.ttest = ttest
        self.prec_mean = prec_mean
        self.clean_zero = clean_zero
        self.show_std = show_std
        self.prec_std = prec_std
        self.add_average = average
        self.missing = missing
        self.missing_str = missing_str
        self.color = color
        self.touch()
    @property
    def nbenchmarks(self):
        return len(self.benchmarks)
    @property
    def nmethods(self):
        return len(self.methods)
    def touch(self):
        self._modif = True
    def update(self):
        if self._modif:
            self.compute()
    def _getfilled(self):
        return np.argwhere(self.map['fill'])
    @property
    def values(self):
        return self.map['values']
    def _indexes(self):
        return itertools.product(range(self.nbenchmarks), range(self.nmethods))
    def _addmap(self, map, dtype, func=None):
        self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype)
        if func is None:
            return
        m = self.map[map]
        f = func
        indexes = self._indexes() if map == 'fill' else self._getfilled()
        for i, j in indexes:
            m[i, j] = f(self.values[i, j])
    def _addrank(self):
        for i in range(self.nbenchmarks):
            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
            col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
            ranked_cols_idx = filled_cols_idx[np.argsort(col_means)]
            if not self.lower_is_better:
                ranked_cols_idx = ranked_cols_idx[::-1]
            self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx) + 1)
    def _addcolor(self):
        for i in range(self.nbenchmarks):
            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
            if filled_cols_idx.size == 0:
                continue
            col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
            minval = min(col_means)
            maxval = max(col_means)
            for col_idx in filled_cols_idx:
                val = self.map['mean'][i, col_idx]
                norm = (maxval - minval)
                if norm > 0:
                    normval = (val - minval) / norm
                else:
                    normval = 0.5
                if self.lower_is_better:
                    normval = 1 - normval
                self.map['color'][i, col_idx] = color_red2green_01(normval)
    def _run_ttest(self, row, col1, col2):
        mean1 = self.map['mean'][row, col1]
        std1 = self.map['std'][row, col1]
        nobs1 = self.map['nobs'][row, col1]
        mean2 = self.map['mean'][row, col2]
        std2 = self.map['std'][row, col2]
        nobs2 = self.map['nobs'][row, col2]
        _, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
        return p_val
    def _run_wilcoxon(self, row, col1, col2):
        values1 = self.map['values'][row, col1]
        values2 = self.map['values'][row, col2]
        _, p_val = wilcoxon(values1, values2)
        return p_val
    def _add_statistical_test(self):
        if self.ttest is None:
            return
        self.some_similar = [False] * self.nmethods
        for i in range(self.nbenchmarks):
            filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
            if len(filled_cols_idx) <= 1:
                continue
            col_means = [self.map['mean'][i, j] for j in filled_cols_idx]
            best_pos = filled_cols_idx[np.argmin(col_means)]
            for j in filled_cols_idx:
                if j == best_pos:
                    continue
                if self.ttest == 'ttest':
                    p_val = self._run_ttest(i, best_pos, j)
                else:
                    p_val = self._run_wilcoxon(i, best_pos, j)
                pval_outcome = pval_interpretation(p_val)
                self.map['ttest'][i, j] = pval_outcome
                if pval_outcome != 'Diff':
                    self.some_similar[j] = True
    def compute(self):
        self._addmap('fill', dtype=bool, func=lambda x: x is not None)
        self._addmap('mean', dtype=float, func=np.mean)
        self._addmap('std', dtype=float, func=np.std)
        self._addmap('nobs', dtype=float, func=len)
        self._addmap('rank', dtype=int, func=None)
        self._addmap('color', dtype=object, func=None)
        self._addmap('ttest', dtype=object, func=None)
        self._addmap('latex', dtype=object, func=None)
        self._addrank()
        self._addcolor()
        self._add_statistical_test()
        if self.add_average:
            self._addave()
        self._modif = False
    def _is_column_full(self, col):
        return all(self.map['fill'][:, self.method_index[col]])
    def _addave(self):
        ave = Table(['ave'], self.methods, lower_is_better=self.lower_is_better, ttest=self.ttest, average=False,
                    missing=self.missing, missing_str=self.missing_str)
        for col in self.methods:
            values = None
            if self._is_column_full(col):
                if self.ttest == 'ttest':
                    values = np.asarray(self.map['mean'][:, self.method_index[col]])
                else:  # wilcoxon
                    values = np.concatenate(self.values[:, self.method_index[col]])
            ave.add('ave', col, values)
        self.average = ave
    def add(self, benchmark, method, values):
        if values is not None:
            values = np.asarray(values)
            if values.ndim == 0:
                values = values.flatten()
        rid, cid = self._coordinates(benchmark, method)
        if self.map['values'][rid, cid] is None:
            self.map['values'][rid, cid] = values
        elif values is not None:
            self.map['values'][rid, cid] = np.concatenate([self.map['values'][rid, cid], values])
        self.touch()
    def get(self, benchmark, method, attr='mean'):
        self.update()
        assert attr in self.map, f'unknwon attribute {attr}'
        rid, cid = self._coordinates(benchmark, method)
        if self.map['fill'][rid, cid]:
            v = self.map[attr][rid, cid]
            if v is None or (isinstance(v, float) and np.isnan(v)):
                return self.missing
            return v
        else:
            return self.missing
    def _coordinates(self, benchmark, method):
        assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range'
        assert method in self.method_index, f'method {method} out of range'
        rid = self.benchmark_index[benchmark]
        cid = self.method_index[method]
        return rid, cid
    def get_average(self, method, attr='mean'):
        self.update()
        if self.add_average:
            return self.average.get('ave', method, attr=attr)
        return None
    def get_color(self, benchmark, method):
        color = self.get(benchmark, method, attr='color')
        if color is None:
            return ''
        return color
    def latex(self, benchmark, method):
        self.update()
        i, j = self._coordinates(benchmark, method)
        if self.map['fill'][i, j] == False:
            return self.missing_str
        mean = self.map['mean'][i, j]
        l = f" {mean:.{self.prec_mean}f}"
        if self.clean_zero:
            l = l.replace(' 0.', '.')
        isbest = self.map['rank'][i, j] == 1
        if isbest:
            l = "\\textbf{" + l.strip() + "}"
        stat = ''
        if self.ttest is not None and self.some_similar[j]:
            test_label = self.map['ttest'][i, j]
            if test_label == 'Sim':
                stat = '^{\dag\phantom{\dag}}'
            elif test_label == 'Same':
                stat = '^{\ddag}'
            elif isbest or test_label == 'Diff':
                stat = '^{\phantom{\ddag}}'
        std = ''
        if self.show_std:
            std = self.map['std'][i, j]
            std = f" {std:.{self.prec_std}f}"
            if self.clean_zero:
                std = std.replace(' 0.', '.')
            std = f" \pm {std:{self.prec_std}}"
        if stat != '' or std != '':
            l = f'{l}${stat}{std}$'
        if self.color:
            l += ' ' + self.map['color'][i, j]
        return l
    def latexTabular(self, benchmark_replace={}, method_replace={}, average=True):
        tab = ' & '
        tab += ' & '.join([method_replace.get(col, col) for col in self.methods])
        tab += ' \\\\\hline\n'
        for row in self.benchmarks:
            rowname = benchmark_replace.get(row, row)
            tab += rowname + ' & '
            tab += self.latexRow(row)
        if average:
            tab += '\hline\n'
            tab += 'Average & '
            tab += self.latexAverage()
        return tab
    def latexRow(self, benchmark, endl='\\\\\hline\n'):
        s = [self.latex(benchmark, col) for col in self.methods]
        s = ' & '.join(s)
        s += ' ' + endl
        return s
    def latexAverage(self, endl='\\\\\hline\n'):
        if self.add_average:
            return self.average.latexRow('ave', endl=endl)
    def getRankTable(self):
        t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=0, average=True)
        for rid, cid in self._getfilled():
            row = self.benchmarks[rid]
            col = self.methods[cid]
            t.add(row, col, self.get(row, col, 'rank'))
        t.compute()
        return t
    def dropMethods(self, methods):
        drop_index = [self.method_index[m] for m in methods]
        new_methods = np.delete(self.methods, drop_index)
        new_index = {col: j for j, col in enumerate(new_methods)}
        self.map['values'] = self.values[:, np.asarray([self.method_index[m] for m in new_methods], dtype=int)]
        self.methods = new_methods
        self.method_index = new_index
        self.touch()
 def pval_interpretation(p_val):
    if 0.005 >= p_val:
        return 'Diff'
    elif 0.05 >= p_val > 0.005:
        return 'Sim'
    elif p_val > 0.05:
        return 'Same'
 def color_red2green_01(val, maxtone=50):
    if np.isnan(val): return None
    assert 0 <= val <= 1, f'val {val} out of range [0,1]'
    # rescale to [-1,1]
    val = val * 2 - 1
    if val < 0:
        color = 'red'
        tone = maxtone * (-val)
    else:
        color = 'green'
        tone = maxtone * val
    return '\cellcolor{' + color + f'!{int(tone)}' + '}'
--- a/NewMethods/uci_experiments.py
+++ b/NewMethods/uci_experiments.py
@ -0,0 +1,173 @@
 from sklearn.svm import LinearSVC
 from class_weight_model import ClassWeightPCC
 # from classification.methods import LowRankLogisticRegression
 # from method.experimental import ExpMax, VarExpMax
 from common import *
 from method.meta import QuaNet
 from quantification_stumps_model import QuantificationStumpRegressor
 from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy
 from quapy.method.meta import EHDy
 import numpy as np
 import os
 import pickle
 import itertools
 import argparse
 import torch
 import shutil
 SAMPLE_SIZE = 100
 N_FOLDS = 5
 N_REPEATS = 1
 N_JOBS = -1
 CUDA_N_JOBS = 2
 ENSEMBLE_N_JOBS = -1
 qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
 __C_range = np.logspace(-3, 3, 7)
 lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
 svmperf_params = {'C': __C_range}
 def quantification_models():
    # yield 'cc', CC(newLR()), lr_params
    # yield 'acc', ACC(newLR()), lr_params
    yield 'pcc.opt', PCC(newLR()), lr_params
    yield 'pacc.opt', PACC(newLR()), lr_params
    yield 'wpacc.opt', ClassWeightPCC(), lr_params
    yield 'ds.opt', QuantificationStumpRegressor(SAMPLE_SIZE), {'C': __C_range}
    # yield 'pcc.opt.svm', PCC(LinearSVC()), lr_params
    # yield 'pacc.opt.svm', PACC(LinearSVC()), lr_params
    # yield 'wpacc.opt.svm', ClassWeightPCC(LinearSVC), lr_params
    # yield 'wpacc.opt2', ClassWeightPCC(C=__C_range), lr_params  # this cannot work in its current version (see notes in the class_weight_model.py file)
    # yield 'MAX', MAX(newLR()), lr_params
    # yield 'MS', MS(newLR()), lr_params
    # yield 'MS2', MS2(newLR()), lr_params
    yield 'sldc', EMQ(calibratedLR()), lr_params
    # yield 'svmmae', SVMAE(), svmperf_params
    # yield 'hdy', HDy(newLR()), lr_params
    # yield 'EMdiag', ExpMax(cov_type='diag'), None
    # yield 'EMfull', ExpMax(cov_type='full'), None
    # yield 'EMtied', ExpMax(cov_type='tied'), None
    # yield 'EMspherical', ExpMax(cov_type='spherical'), None
    # yield 'VEMdiag', VarExpMax(cov_type='diag'), None
    # yield 'VEMfull', VarExpMax(cov_type='full'), None
    # yield 'VEMtied', VarExpMax(cov_type='tied'), None
    # yield 'VEMspherical', VarExpMax(cov_type='spherical'), None
 # def quantification_cuda_models():
 #     device = 'cuda' if torch.cuda.is_available() else 'cpu'
 #     print(f'Running QuaNet in {device}')
 #     learner = LowRankLogisticRegression(**newLR().get_params())
 #     yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
 # def quantification_ensembles():
 #     param_mod_sel = {
 #         'sample_size': SAMPLE_SIZE,
 #         'n_prevpoints': 21,
 #         'n_repetitions': 5,
 #         'refit': True,
 #         'verbose': False
 #     }
 #     common = {
 #         'size': 30,
 #         'red_size': 15,
 #         'max_sample_size': None,  # same as training set
 #         'n_jobs': ENSEMBLE_N_JOBS,
 #         'param_grid': lr_params,
 #         'param_mod_sel': param_mod_sel,
 #         'val_split': 0.4,
 #         'min_pos': 5
 #     }
 #
 #     hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
 #     will be skipped (by setting hyperparameters to None)
    # hyper_none = None
    # yield 'ehdymaeds',  EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none
 def run(experiment):
    optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
    if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
    collection = qp.datasets.fetch_UCILabelledCollection(dataset_name)
    for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=N_FOLDS, nrepeats=N_REPEATS)):
        if is_already_computed(args.results, dataset_name, model_name, run=run, optim_loss=optim_loss):
            print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
            continue
        print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
        # model selection (hyperparameter optimization for a quantification-oriented loss)
        if hyperparams is not None:
            model_selection = qp.model_selection.GridSearchQ(
                model,
                param_grid=hyperparams,
                sample_size=SAMPLE_SIZE,
                n_prevpoints=21,
                n_repetitions=25,
                error=optim_loss,
                refit=True,
                timeout=60 * 60,
                verbose=True
            )
            model_selection.fit(data.training)
            model = model_selection.best_model()
            best_params = model_selection.best_params_
        else:
            model.fit(data.training)
            best_params = {}
        # model evaluation
        true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction(
            model,
            test=data.test,
            sample_size=SAMPLE_SIZE,
            n_prevpoints=21,
            n_repetitions=100,
            n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
        )
        test_true_prevalence = data.test.prevalence()
        evaluate_experiment(true_prevalences, estim_prevalences)
        save_results(args.results, dataset_name, model_name, run, optim_loss,
                     true_prevalences, estim_prevalences,
                     data.training.prevalence(), test_true_prevalence,
                     best_params)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run experiments for UCI ML Quantification')
    parser.add_argument('results', metavar='RESULT_PATH', type=str,
                        help='path to the directory where to store the results')
    parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
                        help='path to the directory with svmperf')
    parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
                        help='path to the directory where to dump QuaNet checkpoints')
    args = parser.parse_args()
    print(f'Result folder: {args.results}')
    np.random.seed(0)
    qp.environ['SVMPERF_HOME'] = args.svmperfpath
    optim_losses = ['mae']
    datasets = qp.datasets.UCI_DATASETS
    models = quantification_models()
    # for runargs in itertools.product(optim_losses, datasets, models):
    #     run(runargs)
    qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
    # models = quantification_cuda_models()
    # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS)
    # models = quantification_ensembles()
    # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
    shutil.rmtree(args.checkpointdir, ignore_errors=True)
--- a/NewMethods/uci_plots.py
+++ b/NewMethods/uci_plots.py
@ -0,0 +1,100 @@
 import quapy as qp
 import os
 import pathlib
 import pickle
 from glob import glob
 import sys
 from plot_driftbox import brokenbar_supremacy_by_drift
 from uci_experiments import *
 from uci_tables import METHODS
 from os.path import join
 qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
 plotext='png'
 resultdir = './results_uci'
 plotdir = './plots_uci'
 os.makedirs(plotdir, exist_ok=True)
 N_RUNS = N_FOLDS * N_REPEATS
 def gather_results(methods, error_name, resultdir):
    method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
    for method in methods:
        for run in range(N_RUNS):
            for experiment in glob(f'{resultdir}/*-{method}-run{run}-m{error_name}.pkl'):
                true_prevalences, estim_prevalences, tr_prev, te_prev, best_params = pickle.load(open(experiment, 'rb'))
                method_names.append(nicename(method))
                true_prevs.append(true_prevalences)
                estim_prevs.append(estim_prevalences)
                tr_prevs.append(tr_prev)
    return method_names, true_prevs, estim_prevs, tr_prevs
 def plot_error_by_drift(methods, error_name, logscale=False, path=None):
    print('plotting error by drift')
    if path is not None:
        path = join(path, f'error_by_drift_{error_name}.{plotext}')
    method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
    qp.plot.error_by_drift(
        method_names,
        true_prevs,
        estim_prevs,
        tr_prevs,
        n_bins=20,
        error_name=error_name,
        show_std=True,
        logscale=logscale,
        title=f'Quantification error as a function of distribution shift',
        savepath=path
    )
 def diagonal_plot(methods, error_name, path=None):
    print('plotting diagonal plots')
    if path is not None:
        path = join(path, f'diag_{error_name}')
    method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
    qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', legend=True, show_std=True, savepath=f'{path}_pos.{plotext}')
 def binary_bias_global(methods, error_name, path=None):
    print('plotting bias global')
    if path is not None:
        path = join(path, f'globalbias_{error_name}')
    method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
    qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', savepath=f'{path}_pos.{plotext}')
 def binary_bias_bins(methods, error_name, path=None):
    print('plotting bias local')
    if path is not None:
        path = join(path, f'localbias_{error_name}')
    method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
    qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', legend=True, savepath=f'{path}_pos.{plotext}')
 def brokenbar_supr(methods, error_name, path=None):
    print('plotting brokenbar_supr')
    if path is not None:
        path = join(path, f'broken_{error_name}')
    method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name, resultdir)
    brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=10, binning='isometric',
                                 x_error='ae', y_error='ae', ttest_alpha=0.005, tail_density_threshold=0.005,
                                 savepath=path)
 if __name__ == '__main__':
    # plot_error_by_drift(METHODS, error_name='ae', path=plotdir)
    # diagonal_plot(METHODS, error_name='ae', path=plotdir)
    # binary_bias_global(METHODS, error_name='ae', path=plotdir)
    # binary_bias_bins(METHODS, error_name='ae', path=plotdir)
    # brokenbar_supr(METHODS, error_name='ae', path=plotdir)
    brokenbar_supr(METHODS, error_name='ae', path=plotdir)
--- a/NewMethods/uci_tables.py
+++ b/NewMethods/uci_tables.py
@ -0,0 +1,81 @@
 import quapy as qp
 import numpy as np
 from os import makedirs
 import sys, os
 import pickle
 import argparse
 from common import *
 from uci_experiments import result_path
 from tabular import Table
 from uci_experiments import *
 import itertools
 tables_path = './tables_uci'
 MAXTONE = 50  # sets the intensity of the maximum color reached by the worst (red) and best (green) results
 makedirs(tables_path, exist_ok=True)
 qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
 METHODS = [#'cc', 'acc',
           # 'pcc',
           # 'pacc',
           # 'wpacc',
           'pcc.opt',
           'pacc.opt',
           'wpacc.opt',
           'ds.opt',
           # 'pcc.opt.svm',
           # 'pacc.opt.svm',
           # 'wpacc.opt.svm',
           # 'wpacc.opt2',
           # 'MAX', 'MS', 'MS2',
           'sldc',
           # 'svmmae',
           # 'hdy',
           # 'ehdymaeds',
           # 'EMdiag', 'EMfull', 'EMtied', 'EMspherical',
           # 'VEMdiag', 'VEMfull', 'VEMtied', 'VEMspherical',
           ]
 if __name__ == '__main__':
    results = 'results_uci'
    datasets = qp.datasets.UCI_DATASETS
    datasets.remove('acute.a')
    datasets.remove('acute.b')
    datasets.remove('iris.1')
    evaluation_measures = [qp.error.ae, qp.error.rae, qp.error.kld]
    for i, eval_func in enumerate(evaluation_measures):
        eval_name = eval_func.__name__
        # Tables evaluation scores for the evaluation measure
        # ----------------------------------------------------
        # fill data table
        table = Table(benchmarks=datasets, methods=METHODS)
        for dataset, method, run in itertools.product(datasets, METHODS, range(N_FOLDS*N_REPEATS)):
            table.add(dataset, method, experiment_errors(results, dataset, method, run, eval_name, optim_loss='ae'))
        # write the latex table
        nmethods = len(METHODS)
        tabular = """
                \\resizebox{\\textwidth}{!}{%
                        \\begin{tabular}{|c||""" + ('c|' * nmethods) + '|' + """} \hline
                          & \multicolumn{""" + str(nmethods) + """}{c||}{Quantification methods} \\\\ \hline
                  """
        rowreplace={dataset: nicename(dataset) for dataset in datasets}
        colreplace={method: nicename(method, eval_name, side=True) for method in METHODS}
        tabular += table.latexTabular(benchmark_replace=rowreplace, method_replace=colreplace)
        tabular += 'Rank Average & ' + table.getRankTable().latexAverage()
        tabular += """
            \end{tabular}%
            }
        """
        save_table(f'{tables_path}/tab_results_{eval_name}.tex', tabular)
    print("[Done]")
--- a/quapy/data/base.py
+++ b/quapy/data/base.py
@ -333,6 +333,7 @@ class Dataset:
            yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})')
 def isbinary(data):
    if isinstance(data, Dataset) or isinstance(data, LabelledCollection):
        return data.binary
--- a/quapy/plot.py
+++ b/quapy/plot.py
@ -228,10 +228,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, e
        if show_std:
            ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25)
-    # xs = bins[:-1]
+    xs = bins[:-1]
-    # ys = inds_histogram_global
+    ys = inds_histogram_global
-    # print(xs.shape, ys.shape)
+    print(xs.shape, ys.shape)
-    # ax.errorbar(xs, ys, label='density')
+    ax.errorbar(xs, ys, label='density')
    ax.set(xlabel=f'Distribution shift between training set and test sample',
           ylabel=f'{error_name.upper()} (true distribution, predicted distribution)',
--- a/quapy/util.py
+++ b/quapy/util.py
@ -41,9 +41,9 @@ def parallel(func, args, n_jobs):
    )
    that takes the quapy.environ variable as input silently
    """
-    def func_dec(environ, *args):
+    def func_dec(environ, *args_i):
        qp.environ = environ
-        return func(*args)
+        return func(*args_i)
    return Parallel(n_jobs=n_jobs)(
        delayed(func_dec)(qp.environ, args_i) for args_i in args
    )
Author	SHA1	Message	Date
Alejandro Moreo Fernandez	785533f74a	plots update	2022-01-14 10:02:36 +01:00
Alejandro Moreo Fernandez	027e18f1e7	adding broken bar plot	2021-11-22 17:21:28 +01:00
Alejandro Moreo Fernandez	1ae45f8b9f	doing tests with quantification stumps	2021-11-18 16:57:50 +01:00
Alejandro Moreo Fernandez	06d36a132d	testing new models	2021-11-17 17:07:13 +01:00
Alejandro Moreo Fernandez	27124d0d00	trying the class-reweight model	2021-11-16 17:41:22 +01:00
Alejandro Moreo Fernandez	ee007bd0d5	quantification stumps	2021-11-14 01:20:43 +01:00
Alejandro Moreo Fernandez	970008c9f7	testing the class-reweight method on UCI datasets	2021-11-04 16:09:49 +01:00
Alejandro Moreo Fernandez	3df55f3613	proof of concept	2021-11-03 14:55:28 +01:00