diff --git a/NewMethods/class_weight_model.py b/NewMethods/class_weight_model.py index 4b5410f..3f8c55c 100644 --- a/NewMethods/class_weight_model.py +++ b/NewMethods/class_weight_model.py @@ -1,5 +1,6 @@ -from sklearn.linear_model import LogisticRegression +from sklearn.linear_model import LogisticRegression, LogisticRegressionCV import numpy as np +from sklearn.model_selection import GridSearchCV import quapy as qp from data import LabelledCollection @@ -7,21 +8,59 @@ from method.base import BaseQuantifier from quapy.method.aggregative import AggregativeQuantifier, AggregativeProbabilisticQuantifier, CC, ACC, PCC, PACC +""" +Possible extensions: + - add CC and ClassWeightCC + - understanding how to optimize hyper-parameters for the final PCC quantifier. It is not trivial, since once + class_weight has been set, the C parameter plays a secondary role. The reason is that I hardly doubt that + the cross-validation is taking into account the fact that one class might be more important than the other, + and so the best C parameter for quantifying, conditioned on this class prevelance, has nothing to do with the + best C for classifying the current data... Unless I define an evaluation metric weighting for each class weight, + but this is very tricky (it is like implementing the "adjustment" in the evaluation metric...) + - might be worth investigating deeper about the role of CV, and val_split, in ACC/PACC. Is it something that + consistently deliver improved accuracies (for quantification) or there is a tricky trade-off between the data + usage, the instability due to adjusting for slightly different quantifiers, and so on? + - argue that this method is only interesting in cases in which we have few data (adjustment discards data), + and not when the classifier is a costly one (we require training during test). Argue that the computational + burden can be transfered to the training stage, by training many LR for different class_weight ratios, and + then using the most similar one, to the guessed prevalence, during test. + - better investigate the "iterative" nature of the method. + - better investigate the implications with other learners. E.g., using EMQ as a prompt, or using EMQ in the second + stage (test). +""" class ClassWeightPCC(BaseQuantifier): - def __init__(self): - self.learner = None + def __init__(self, **pcc_param_grid): + self.learner = PACC(LogisticRegression()) + if 'class_weight' in pcc_param_grid: + raise ValueError('parameter "class_weight" cannot be included in "pcc_param_grid"') + self.pcc_param_grid = dict(pcc_param_grid) + self.deployed = False def fit(self, data: LabelledCollection, fit_learner=True): self.train = data - self.prompt = PACC(LogisticRegression()).fit(self.train) + self.learner.fit(self.train) return self + def deploy(self, deployed=True): + self.deployed = deployed + def quantify(self, instances): - guessed_prevalence = self.prompt.quantify(instances) + guessed_prevalence = self.learner.quantify(instances) class_weight = self._get_class_weight(guessed_prevalence) - return PCC(LogisticRegression(class_weight=class_weight)).fit(self.train).quantify(instances) + if self.pcc_param_grid and self.deployed: + """If the param grid has been specified, then use it to find good hyper-parameters for the classifier. + In this case, we know (an approximation of) the target prevalence, so we might simply want to optimize + for classification (and not for quantification)""" + # pcc = PCC(GridSearchCV(LogisticRegression(class_weight=class_weight), param_grid=self.pcc_param_grid, n_jobs=-1)) + pcc = PCC(LogisticRegressionCV(Cs=self.pcc_param_grid['C'], class_weight=class_weight, n_jobs=-1, cv=3)) + else: + """If the param grid has not been specified, we take the best parameters found for the base quantifier""" + base_parameters = dict(self.learner.get_params()) + base_parameters['class_weight'] = class_weight # override the class_weight parameter + pcc = PCC(LogisticRegression(**base_parameters)) + return pcc.fit(self.train).quantify(instances) def _get_class_weight(self, prevalence): # class_weight = compute_class_weight('balanced', classes=[0, 1], y=mock_y(prevalence)) @@ -35,10 +74,10 @@ class ClassWeightPCC(BaseQuantifier): return {0:weights[0], 1:weights[1]} def set_params(self, **parameters): - pass + self.learner.set_params(**parameters) def get_params(self, deep=True): - return self.prompt.get_params() + return self.learner.get_params() @property def classes_(self): diff --git a/NewMethods/common.py b/NewMethods/common.py new file mode 100644 index 0000000..56aaffe --- /dev/null +++ b/NewMethods/common.py @@ -0,0 +1,89 @@ +import pickle +import os +from sklearn.calibration import CalibratedClassifierCV +from sklearn.linear_model import LogisticRegression + +import quapy as qp + + + +def newLR(): + return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1) + + +def calibratedLR(): + return CalibratedClassifierCV(LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)) + + +def save_results(result_dir, dataset_name, model_name, run, optim_loss, *results): + rpath = result_path(result_dir, dataset_name, model_name, run, optim_loss) + qp.util.create_parent_dir(rpath) + with open(rpath, 'wb') as foo: + pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL) + + +def evaluate_experiment(true_prevalences, estim_prevalences): + print('\nEvaluation Metrics:\n' + '=' * 22) + for eval_measure in [qp.error.mae, qp.error.mrae]: + err = eval_measure(true_prevalences, estim_prevalences) + print(f'\t{eval_measure.__name__}={err:.4f}') + print() + + +def result_path(path, dataset_name, model_name, run, optim_loss): + return os.path.join(path, f'{dataset_name}-{model_name}-run{run}-{optim_loss}.pkl') + + +def is_already_computed(result_dir, dataset_name, model_name, run, optim_loss): + return os.path.exists(result_path(result_dir, dataset_name, model_name, run, optim_loss)) + + +nice = { + 'mae':'AE', + 'ae':'AE', + 'svmkld': 'SVM(KLD)', + 'svmnkld': 'SVM(NKLD)', + 'svmq': 'SVM(Q)', + 'svmae': 'SVM(AE)', + 'svmmae': 'SVM(AE)', + 'svmmrae': 'SVM(RAE)', + 'hdy': 'HDy', + 'sldc': 'SLD', + 'X': 'TSX', + 'T50': 'TS50', + 'ehdymaeds': 'E(HDy)$_\mathrm{DS}$', + 'Average': 'Average', + 'EMdiag':'EM$_{diag}$', 'EMfull':'EM$_{full}$', 'EMtied':'EM$_{tied}$', 'EMspherical':'EM$_{sph}$', + 'VEMdiag':'VEM$_{diag}$', 'VEMfull':'VEM$_{full}$', 'VEMtied':'VEM$_{tied}$', 'VEMspherical':'VEM$_{sph}$', +} + + +def nicerm(key): + return '\mathrm{'+nice[key]+'}' + + +def nicename(method, eval_name=None, side=False): + m = nice.get(method, method.upper()) + if eval_name is not None: + m = m.replace('$$','') + if side: + m = '\side{'+m+'}' + return m + + +def save_table(path, table): + print(f'saving results in {path}') + with open(path, 'wt') as foo: + foo.write(table) + + +def experiment_errors(path, dataset, method, run, eval_loss, optim_loss=None): + if optim_loss is None: + optim_loss = eval_loss + path = result_path(path, dataset, method, run, 'm' + optim_loss if not optim_loss.startswith('m') else optim_loss) + if os.path.exists(path): + true_prevs, estim_prevs, _, _, _ = pickle.load(open(path, 'rb')) + err_fn = getattr(qp.error, eval_loss) + errors = err_fn(true_prevs, estim_prevs) + return errors + return None \ No newline at end of file diff --git a/NewMethods/tabular.py b/NewMethods/tabular.py new file mode 100644 index 0000000..ea8792c --- /dev/null +++ b/NewMethods/tabular.py @@ -0,0 +1,321 @@ +import numpy as np +import itertools +from scipy.stats import ttest_ind_from_stats, wilcoxon + + +class Table: + VALID_TESTS = [None, "wilcoxon", "ttest"] + + def __init__(self, benchmarks, methods, lower_is_better=True, ttest='ttest', prec_mean=3, + clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--', + color=True): + assert ttest in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}' + + self.benchmarks = np.asarray(benchmarks) + self.benchmark_index = {row: i for i, row in enumerate(benchmarks)} + + self.methods = np.asarray(methods) + self.method_index = {col: j for j, col in enumerate(methods)} + + self.map = {} + # keyed (#rows,#cols)-ndarrays holding computations from self.map['values'] + self._addmap('values', dtype=object) + self.lower_is_better = lower_is_better + self.ttest = ttest + self.prec_mean = prec_mean + self.clean_zero = clean_zero + self.show_std = show_std + self.prec_std = prec_std + self.add_average = average + self.missing = missing + self.missing_str = missing_str + self.color = color + + self.touch() + + @property + def nbenchmarks(self): + return len(self.benchmarks) + + @property + def nmethods(self): + return len(self.methods) + + def touch(self): + self._modif = True + + def update(self): + if self._modif: + self.compute() + + def _getfilled(self): + return np.argwhere(self.map['fill']) + + @property + def values(self): + return self.map['values'] + + def _indexes(self): + return itertools.product(range(self.nbenchmarks), range(self.nmethods)) + + def _addmap(self, map, dtype, func=None): + self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype) + if func is None: + return + m = self.map[map] + f = func + indexes = self._indexes() if map == 'fill' else self._getfilled() + for i, j in indexes: + m[i, j] = f(self.values[i, j]) + + def _addrank(self): + for i in range(self.nbenchmarks): + filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() + col_means = [self.map['mean'][i, j] for j in filled_cols_idx] + ranked_cols_idx = filled_cols_idx[np.argsort(col_means)] + if not self.lower_is_better: + ranked_cols_idx = ranked_cols_idx[::-1] + self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx) + 1) + + def _addcolor(self): + for i in range(self.nbenchmarks): + filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() + if filled_cols_idx.size == 0: + continue + col_means = [self.map['mean'][i, j] for j in filled_cols_idx] + minval = min(col_means) + maxval = max(col_means) + for col_idx in filled_cols_idx: + val = self.map['mean'][i, col_idx] + norm = (maxval - minval) + if norm > 0: + normval = (val - minval) / norm + else: + normval = 0.5 + if self.lower_is_better: + normval = 1 - normval + self.map['color'][i, col_idx] = color_red2green_01(normval) + + def _run_ttest(self, row, col1, col2): + mean1 = self.map['mean'][row, col1] + std1 = self.map['std'][row, col1] + nobs1 = self.map['nobs'][row, col1] + mean2 = self.map['mean'][row, col2] + std2 = self.map['std'][row, col2] + nobs2 = self.map['nobs'][row, col2] + _, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2) + return p_val + + def _run_wilcoxon(self, row, col1, col2): + values1 = self.map['values'][row, col1] + values2 = self.map['values'][row, col2] + _, p_val = wilcoxon(values1, values2) + return p_val + + def _add_statistical_test(self): + if self.ttest is None: + return + self.some_similar = [False] * self.nmethods + for i in range(self.nbenchmarks): + filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() + if len(filled_cols_idx) <= 1: + continue + col_means = [self.map['mean'][i, j] for j in filled_cols_idx] + best_pos = filled_cols_idx[np.argmin(col_means)] + + for j in filled_cols_idx: + if j == best_pos: + continue + if self.ttest == 'ttest': + p_val = self._run_ttest(i, best_pos, j) + else: + p_val = self._run_wilcoxon(i, best_pos, j) + + pval_outcome = pval_interpretation(p_val) + self.map['ttest'][i, j] = pval_outcome + if pval_outcome != 'Diff': + self.some_similar[j] = True + + def compute(self): + self._addmap('fill', dtype=bool, func=lambda x: x is not None) + self._addmap('mean', dtype=float, func=np.mean) + self._addmap('std', dtype=float, func=np.std) + self._addmap('nobs', dtype=float, func=len) + self._addmap('rank', dtype=int, func=None) + self._addmap('color', dtype=object, func=None) + self._addmap('ttest', dtype=object, func=None) + self._addmap('latex', dtype=object, func=None) + self._addrank() + self._addcolor() + self._add_statistical_test() + if self.add_average: + self._addave() + self._modif = False + + def _is_column_full(self, col): + return all(self.map['fill'][:, self.method_index[col]]) + + def _addave(self): + ave = Table(['ave'], self.methods, lower_is_better=self.lower_is_better, ttest=self.ttest, average=False, + missing=self.missing, missing_str=self.missing_str) + for col in self.methods: + values = None + if self._is_column_full(col): + if self.ttest == 'ttest': + values = np.asarray(self.map['mean'][:, self.method_index[col]]) + else: # wilcoxon + values = np.concatenate(self.values[:, self.method_index[col]]) + ave.add('ave', col, values) + self.average = ave + + def add(self, benchmark, method, values): + if values is not None: + values = np.asarray(values) + if values.ndim == 0: + values = values.flatten() + rid, cid = self._coordinates(benchmark, method) + if self.map['values'][rid, cid] is None: + self.map['values'][rid, cid] = values + elif values is not None: + self.map['values'][rid, cid] = np.concatenate([self.map['values'][rid, cid], values]) + self.touch() + + def get(self, benchmark, method, attr='mean'): + self.update() + assert attr in self.map, f'unknwon attribute {attr}' + rid, cid = self._coordinates(benchmark, method) + if self.map['fill'][rid, cid]: + v = self.map[attr][rid, cid] + if v is None or (isinstance(v, float) and np.isnan(v)): + return self.missing + return v + else: + return self.missing + + def _coordinates(self, benchmark, method): + assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range' + assert method in self.method_index, f'method {method} out of range' + rid = self.benchmark_index[benchmark] + cid = self.method_index[method] + return rid, cid + + def get_average(self, method, attr='mean'): + self.update() + if self.add_average: + return self.average.get('ave', method, attr=attr) + return None + + def get_color(self, benchmark, method): + color = self.get(benchmark, method, attr='color') + if color is None: + return '' + return color + + def latex(self, benchmark, method): + self.update() + i, j = self._coordinates(benchmark, method) + if self.map['fill'][i, j] == False: + return self.missing_str + + mean = self.map['mean'][i, j] + l = f" {mean:.{self.prec_mean}f}" + if self.clean_zero: + l = l.replace(' 0.', '.') + + isbest = self.map['rank'][i, j] == 1 + if isbest: + l = "\\textbf{" + l.strip() + "}" + + stat = '' + if self.ttest is not None and self.some_similar[j]: + test_label = self.map['ttest'][i, j] + if test_label == 'Sim': + stat = '^{\dag\phantom{\dag}}' + elif test_label == 'Same': + stat = '^{\ddag}' + elif isbest or test_label == 'Diff': + stat = '^{\phantom{\ddag}}' + + std = '' + if self.show_std: + std = self.map['std'][i, j] + std = f" {std:.{self.prec_std}f}" + if self.clean_zero: + std = std.replace(' 0.', '.') + std = f" \pm {std:{self.prec_std}}" + + if stat != '' or std != '': + l = f'{l}${stat}{std}$' + + if self.color: + l += ' ' + self.map['color'][i, j] + + return l + + def latexTabular(self, benchmark_replace={}, method_replace={}, average=True): + tab = ' & ' + tab += ' & '.join([method_replace.get(col, col) for col in self.methods]) + tab += ' \\\\\hline\n' + for row in self.benchmarks: + rowname = benchmark_replace.get(row, row) + tab += rowname + ' & ' + tab += self.latexRow(row) + + if average: + tab += '\hline\n' + tab += 'Average & ' + tab += self.latexAverage() + return tab + + def latexRow(self, benchmark, endl='\\\\\hline\n'): + s = [self.latex(benchmark, col) for col in self.methods] + s = ' & '.join(s) + s += ' ' + endl + return s + + def latexAverage(self, endl='\\\\\hline\n'): + if self.add_average: + return self.average.latexRow('ave', endl=endl) + + def getRankTable(self): + t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=0, average=True) + for rid, cid in self._getfilled(): + row = self.benchmarks[rid] + col = self.methods[cid] + t.add(row, col, self.get(row, col, 'rank')) + t.compute() + return t + + def dropMethods(self, methods): + drop_index = [self.method_index[m] for m in methods] + new_methods = np.delete(self.methods, drop_index) + new_index = {col: j for j, col in enumerate(new_methods)} + + self.map['values'] = self.values[:, np.asarray([self.method_index[m] for m in new_methods], dtype=int)] + self.methods = new_methods + self.method_index = new_index + self.touch() + + +def pval_interpretation(p_val): + if 0.005 >= p_val: + return 'Diff' + elif 0.05 >= p_val > 0.005: + return 'Sim' + elif p_val > 0.05: + return 'Same' + + +def color_red2green_01(val, maxtone=50): + if np.isnan(val): return None + assert 0 <= val <= 1, f'val {val} out of range [0,1]' + + # rescale to [-1,1] + val = val * 2 - 1 + if val < 0: + color = 'red' + tone = maxtone * (-val) + else: + color = 'green' + tone = maxtone * val + return '\cellcolor{' + color + f'!{int(tone)}' + '}' \ No newline at end of file diff --git a/NewMethods/tc_experiments.py b/NewMethods/tc_experiments.py new file mode 100644 index 0000000..119f1e6 --- /dev/null +++ b/NewMethods/tc_experiments.py @@ -0,0 +1,170 @@ +from sklearn.calibration import CalibratedClassifierCV + +import quapy as qp +from sklearn.linear_model import LogisticRegression + +from class_weight_model import ClassWeightPCC +# from classification.methods import LowRankLogisticRegression +# from method.experimental import ExpMax, VarExpMax +from common import * +from method.meta import QuaNet +from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy +from quapy.method.meta import EHDy +import numpy as np +import os +import pickle +import itertools +import argparse +import torch +import shutil + + +SAMPLE_SIZE = 500 + +N_JOBS = -1 +CUDA_N_JOBS = 2 +ENSEMBLE_N_JOBS = -1 + +qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE + +__C_range = np.logspace(-3, 3, 7) +lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} +svmperf_params = {'C': __C_range} + + +def quantification_models(): + # yield 'cc', CC(newLR()), lr_params + # yield 'acc', ACC(newLR()), lr_params + yield 'pcc', PCC(newLR()), None + yield 'pacc', PACC(newLR()), None + yield 'wpacc', ClassWeightPCC(), None + yield 'pcc.opt', PCC(newLR()), lr_params + yield 'pacc.opt', PACC(newLR()), lr_params + yield 'wpacc.opt', ClassWeightPCC(), lr_params + # yield 'MAX', MAX(newLR()), lr_params + # yield 'MS', MS(newLR()), lr_params + # yield 'MS2', MS2(newLR()), lr_params + # yield 'sldc', EMQ(calibratedLR()), lr_params + # yield 'svmmae', SVMAE(), svmperf_params + # yield 'hdy', HDy(newLR()), lr_params + # yield 'EMdiag', ExpMax(cov_type='diag'), None + # yield 'EMfull', ExpMax(cov_type='full'), None + # yield 'EMtied', ExpMax(cov_type='tied'), None + # yield 'EMspherical', ExpMax(cov_type='spherical'), None + # yield 'VEMdiag', VarExpMax(cov_type='diag'), None + # yield 'VEMfull', VarExpMax(cov_type='full'), None + # yield 'VEMtied', VarExpMax(cov_type='tied'), None + # yield 'VEMspherical', VarExpMax(cov_type='spherical'), None + + +# def quantification_cuda_models(): +# device = 'cuda' if torch.cuda.is_available() else 'cpu' +# print(f'Running QuaNet in {device}') +# learner = LowRankLogisticRegression(**newLR().get_params()) +# yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params + + +def quantification_ensembles(): + param_mod_sel = { + 'sample_size': SAMPLE_SIZE, + 'n_prevpoints': 21, + 'n_repetitions': 5, + 'refit': True, + 'verbose': False + } + common = { + 'size': 30, + 'red_size': 15, + 'max_sample_size': None, # same as training set + 'n_jobs': ENSEMBLE_N_JOBS, + 'param_grid': lr_params, + 'param_mod_sel': param_mod_sel, + 'val_split': 0.4, + 'min_pos': 5 + } + + # hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection + # will be skipped (by setting hyperparameters to None) + hyper_none = None + yield 'ehdymaeds', EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none + + + +def run(experiment): + optim_loss, dataset_name, (model_name, model, hyperparams) = experiment + + data = qp.datasets.fetch_reviews(dataset_name, tfidf=True, min_df=5) + run=0 + + if is_already_computed(args.results, dataset_name, model_name, run=run, optim_loss=optim_loss): + print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.') + return + + print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}') + # model selection (hyperparameter optimization for a quantification-oriented loss) + if hyperparams is not None: + model_selection = qp.model_selection.GridSearchQ( + model, + param_grid=hyperparams, + sample_size=SAMPLE_SIZE, + n_prevpoints=21, + n_repetitions=100, + error=optim_loss, + refit=True, + timeout=60 * 60, + verbose=True + ) + model_selection.fit(data.training) + model = model_selection.best_model() + best_params = model_selection.best_params_ + else: + model.fit(data.training) + best_params = {} + + # model evaluation + true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction( + model, + test=data.test, + sample_size=SAMPLE_SIZE, + n_prevpoints=21, + n_repetitions=100, + n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1, + verbose=True + ) + test_true_prevalence = data.test.prevalence() + + evaluate_experiment(true_prevalences, estim_prevalences) + save_results(args.results, dataset_name, model_name, run, optim_loss, + true_prevalences, estim_prevalences, + data.training.prevalence(), test_true_prevalence, + best_params) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification') + parser.add_argument('results', metavar='RESULT_PATH', type=str, + help='path to the directory where to store the results') + parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification', + help='path to the directory with svmperf') + parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint', + help='path to the directory where to dump QuaNet checkpoints') + args = parser.parse_args() + + print(f'Result folder: {args.results}') + np.random.seed(0) + + qp.environ['SVMPERF_HOME'] = args.svmperfpath + + optim_losses = ['mae'] + datasets = qp.datasets.REVIEWS_SENTIMENT_DATASETS + + models = quantification_models() + qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS) + + # models = quantification_cuda_models() + # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS) + + # models = quantification_ensembles() + # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1) + + shutil.rmtree(args.checkpointdir, ignore_errors=True) diff --git a/NewMethods/tc_tables.py b/NewMethods/tc_tables.py new file mode 100644 index 0000000..3aa7ea0 --- /dev/null +++ b/NewMethods/tc_tables.py @@ -0,0 +1,70 @@ +import quapy as qp +import numpy as np +from os import makedirs +import sys, os +import pickle +import argparse +from common import * +from tc_experiments import * +from tabular import Table +import itertools + +tables_path = './tables_reviews' +MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results + +makedirs(tables_path, exist_ok=True) + +qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE + + + +METHODS = ['cc', 'acc', 'pcc', + 'pacc', + 'wpacc', + # 'MAX', 'MS', 'MS2', + 'sldc', + # 'svmmae', + # 'hdy', + # 'ehdymaeds', + # 'EMdiag', 'EMfull', 'EMtied', 'EMspherical', + # 'VEMdiag', 'VEMfull', 'VEMtied', 'VEMspherical', + ] + + +if __name__ == '__main__': + results = 'results_reviews' + + datasets = qp.datasets.REVIEWS_SENTIMENT_DATASETS + evaluation_measures = [qp.error.ae] + + run=0 + for i, eval_func in enumerate(evaluation_measures): + eval_name = eval_func.__name__ + + # Tables evaluation scores for the evaluation measure + # ---------------------------------------------------- + # fill data table + table = Table(benchmarks=datasets, methods=METHODS) + for dataset, method in itertools.product(datasets, METHODS): + table.add(dataset, method, experiment_errors(results, dataset, method, run, eval_name)) + + # write the latex table + nmethods = len(METHODS) + tabular = """ + \\resizebox{\\textwidth}{!}{% + \\begin{tabular}{|c||""" + ('c|' * nmethods) + '|' + """} \hline + & \multicolumn{""" + str(nmethods) + """}{c||}{Quantification methods} \\\\ \hline + """ + rowreplace={dataset: nicename(dataset) for dataset in datasets} + colreplace={method: nicename(method, eval_name, side=True) for method in METHODS} + + tabular += table.latexTabular(benchmark_replace=rowreplace, method_replace=colreplace) + tabular += 'Rank Average & ' + table.getRankTable().latexAverage() + tabular += """ + \end{tabular}% + } + """ + + save_table(f'{tables_path}/tab_results_{eval_name}.tex', tabular) + + print("[Done]") \ No newline at end of file diff --git a/NewMethods/uci_experiments.py b/NewMethods/uci_experiments.py new file mode 100644 index 0000000..098c88b --- /dev/null +++ b/NewMethods/uci_experiments.py @@ -0,0 +1,172 @@ +from class_weight_model import ClassWeightPCC +# from classification.methods import LowRankLogisticRegression +# from method.experimental import ExpMax, VarExpMax +from common import * +from method.meta import QuaNet +from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy +from quapy.method.meta import EHDy +import numpy as np +import os +import pickle +import itertools +import argparse +import torch +import shutil + + +SAMPLE_SIZE = 100 + +N_FOLDS = 5 +N_REPEATS = 1 + +N_JOBS = -1 +CUDA_N_JOBS = 2 +ENSEMBLE_N_JOBS = -1 + +qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE + +__C_range = np.logspace(-3, 3, 7) +lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} +svmperf_params = {'C': __C_range} + + +def quantification_models(): + # yield 'cc', CC(newLR()), lr_params + # yield 'acc', ACC(newLR()), lr_params + yield 'pcc.opt', PCC(newLR()), lr_params + yield 'pacc.opt', PACC(newLR()), lr_params + yield 'wpacc.opt', ClassWeightPCC(), lr_params + # yield 'wpacc.opt2', ClassWeightPCC(C=__C_range), lr_params # this cannot work in its current version (see notes in the class_weight_model.py file) + # yield 'MAX', MAX(newLR()), lr_params + # yield 'MS', MS(newLR()), lr_params + # yield 'MS2', MS2(newLR()), lr_params + yield 'sldc', EMQ(calibratedLR()), lr_params + # yield 'svmmae', SVMAE(), svmperf_params + # yield 'hdy', HDy(newLR()), lr_params + # yield 'EMdiag', ExpMax(cov_type='diag'), None + # yield 'EMfull', ExpMax(cov_type='full'), None + # yield 'EMtied', ExpMax(cov_type='tied'), None + # yield 'EMspherical', ExpMax(cov_type='spherical'), None + # yield 'VEMdiag', VarExpMax(cov_type='diag'), None + # yield 'VEMfull', VarExpMax(cov_type='full'), None + # yield 'VEMtied', VarExpMax(cov_type='tied'), None + # yield 'VEMspherical', VarExpMax(cov_type='spherical'), None + + +# def quantification_cuda_models(): +# device = 'cuda' if torch.cuda.is_available() else 'cpu' +# print(f'Running QuaNet in {device}') +# learner = LowRankLogisticRegression(**newLR().get_params()) +# yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params + + +def quantification_ensembles(): + param_mod_sel = { + 'sample_size': SAMPLE_SIZE, + 'n_prevpoints': 21, + 'n_repetitions': 5, + 'refit': True, + 'verbose': False + } + common = { + 'size': 30, + 'red_size': 15, + 'max_sample_size': None, # same as training set + 'n_jobs': ENSEMBLE_N_JOBS, + 'param_grid': lr_params, + 'param_mod_sel': param_mod_sel, + 'val_split': 0.4, + 'min_pos': 5 + } + + # hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection + # will be skipped (by setting hyperparameters to None) + hyper_none = None + yield 'ehdymaeds', EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none + + +def run(experiment): + optim_loss, dataset_name, (model_name, model, hyperparams) = experiment + if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return + + collection = qp.datasets.fetch_UCILabelledCollection(dataset_name) + for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=N_FOLDS, nrepeats=N_REPEATS)): + if is_already_computed(args.results, dataset_name, model_name, run=run, optim_loss=optim_loss): + print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.') + continue + + print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}') + # model selection (hyperparameter optimization for a quantification-oriented loss) + if hyperparams is not None: + model_selection = qp.model_selection.GridSearchQ( + model, + param_grid=hyperparams, + sample_size=SAMPLE_SIZE, + n_prevpoints=21, + n_repetitions=25, + error=optim_loss, + refit=True, + timeout=60 * 60, + verbose=True + ) + model_selection.fit(data.training) + model = model_selection.best_model() + best_params = model_selection.best_params_ + else: + model.fit(data.training) + best_params = {} + + if hasattr(model, "deploy"): + model.deploy() + + # model evaluation + true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction( + model, + test=data.test, + sample_size=SAMPLE_SIZE, + n_prevpoints=21, + n_repetitions=100, + n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1 + ) + test_true_prevalence = data.test.prevalence() + + if hasattr(model, "deploy"): + model.deploy(False) + + evaluate_experiment(true_prevalences, estim_prevalences) + save_results(args.results, dataset_name, model_name, run, optim_loss, + true_prevalences, estim_prevalences, + data.training.prevalence(), test_true_prevalence, + best_params) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification') + parser.add_argument('results', metavar='RESULT_PATH', type=str, + help='path to the directory where to store the results') + parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification', + help='path to the directory with svmperf') + parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint', + help='path to the directory where to dump QuaNet checkpoints') + args = parser.parse_args() + + print(f'Result folder: {args.results}') + np.random.seed(0) + + qp.environ['SVMPERF_HOME'] = args.svmperfpath + + optim_losses = ['mae'] + datasets = qp.datasets.UCI_DATASETS + + models = quantification_models() + # for runargs in itertools.product(optim_losses, datasets, models): + # run(runargs) + qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS) + + # models = quantification_cuda_models() + # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS) + + # models = quantification_ensembles() + # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1) + + shutil.rmtree(args.checkpointdir, ignore_errors=True) diff --git a/NewMethods/uci_plots.py b/NewMethods/uci_plots.py new file mode 100644 index 0000000..9705f27 --- /dev/null +++ b/NewMethods/uci_plots.py @@ -0,0 +1,83 @@ +import quapy as qp +import os +import pathlib +import pickle +from glob import glob +import sys +from uci_common import * +from os.path import join + + +qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE +plotext='png' + +resultdir = './results_uci' +plotdir = './plots_uci' +os.makedirs(plotdir, exist_ok=True) + +N_RUNS = N_FOLDS * N_REPEATS + +def gather_results(methods, error_name): + method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], [] + for method in methods: + for run in range(N_RUNS): + for experiment in glob(f'{resultdir}/*-{method}-run{run}-m{error_name}.pkl'): + true_prevalences, estim_prevalences, tr_prev, te_prev, best_params = pickle.load(open(experiment, 'rb')) + method_names.append(nicename(method)) + true_prevs.append(true_prevalences) + estim_prevs.append(estim_prevalences) + tr_prevs.append(tr_prev) + return method_names, true_prevs, estim_prevs, tr_prevs + + +def plot_error_by_drift(methods, error_name, logscale=False, path=None): + print('plotting error by drift') + if path is not None: + path = join(path, f'error_by_drift_{error_name}.{plotext}') + method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name) + qp.plot.error_by_drift( + method_names, + true_prevs, + estim_prevs, + tr_prevs, + n_bins=20, + error_name=error_name, + show_std=False, + logscale=logscale, + title=f'Quantification error as a function of distribution shift', + savepath=path + ) + + +def diagonal_plot(methods, error_name, path=None): + print('plotting diagonal plots') + if path is not None: + path = join(path, f'diag_{error_name}') + method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name) + qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', legend=True, show_std=False, savepath=f'{path}_pos.{plotext}') + + +def binary_bias_global(methods, error_name, path=None): + print('plotting bias global') + if path is not None: + path = join(path, f'globalbias_{error_name}') + method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name) + qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', savepath=f'{path}_pos.{plotext}') + + +def binary_bias_bins(methods, error_name, path=None): + print('plotting bias local') + if path is not None: + path = join(path, f'localbias_{error_name}') + method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name) + qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Positive', legend=True, savepath=f'{path}_pos.{plotext}') + + + +plot_error_by_drift(METHODS, error_name='ae', path=plotdir) + +diagonal_plot(METHODS, error_name='ae', path=plotdir) + +binary_bias_global(METHODS, error_name='ae', path=plotdir) + +binary_bias_bins(METHODS, error_name='ae', path=plotdir) diff --git a/NewMethods/uci_tables.py b/NewMethods/uci_tables.py new file mode 100644 index 0000000..dd076ce --- /dev/null +++ b/NewMethods/uci_tables.py @@ -0,0 +1,79 @@ +import quapy as qp +import numpy as np +from os import makedirs +import sys, os +import pickle +import argparse +from common import * +from uci_experiments import result_path +from tabular import Table +from uci_experiments import * +import itertools + +tables_path = './tables_uci' +MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results + +makedirs(tables_path, exist_ok=True) + +qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE + + + + +METHODS = [#'cc', 'acc', + 'pcc', + 'pacc', + 'wpacc', + 'pcc.opt', + 'pacc.opt', + 'wpacc.opt', + 'wpacc.opt2', + # 'MAX', 'MS', 'MS2', + 'sldc', + # 'svmmae', + # 'hdy', + # 'ehdymaeds', + # 'EMdiag', 'EMfull', 'EMtied', 'EMspherical', + # 'VEMdiag', 'VEMfull', 'VEMtied', 'VEMspherical', + ] + + +if __name__ == '__main__': + results = 'results_uci' + + datasets = qp.datasets.UCI_DATASETS + datasets.remove('acute.a') + datasets.remove('acute.b') + datasets.remove('iris.1') + evaluation_measures = [qp.error.ae, qp.error.rae, qp.error.kld] + + for i, eval_func in enumerate(evaluation_measures): + eval_name = eval_func.__name__ + + # Tables evaluation scores for the evaluation measure + # ---------------------------------------------------- + # fill data table + table = Table(benchmarks=datasets, methods=METHODS) + for dataset, method, run in itertools.product(datasets, METHODS, range(N_FOLDS*N_REPEATS)): + table.add(dataset, method, experiment_errors(results, dataset, method, run, eval_name, optim_loss='ae')) + + # write the latex table + nmethods = len(METHODS) + tabular = """ + \\resizebox{\\textwidth}{!}{% + \\begin{tabular}{|c||""" + ('c|' * nmethods) + '|' + """} \hline + & \multicolumn{""" + str(nmethods) + """}{c||}{Quantification methods} \\\\ \hline + """ + rowreplace={dataset: nicename(dataset) for dataset in datasets} + colreplace={method: nicename(method, eval_name, side=True) for method in METHODS} + + tabular += table.latexTabular(benchmark_replace=rowreplace, method_replace=colreplace) + tabular += 'Rank Average & ' + table.getRankTable().latexAverage() + tabular += """ + \end{tabular}% + } + """ + + save_table(f'{tables_path}/tab_results_{eval_name}.tex', tabular) + + print("[Done]") \ No newline at end of file diff --git a/quapy/util.py b/quapy/util.py index ab205aa..13555be 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -41,9 +41,9 @@ def parallel(func, args, n_jobs): ) that takes the quapy.environ variable as input silently """ - def func_dec(environ, *args): + def func_dec(environ, *args_i): qp.environ = environ - return func(*args) + return func(*args_i) return Parallel(n_jobs=n_jobs)( delayed(func_dec)(qp.environ, args_i) for args_i in args )