diff --git a/ClassifierAccuracy/experiments.py b/ClassifierAccuracy/experiments.py index 6af1f16..3234e99 100644 --- a/ClassifierAccuracy/experiments.py +++ b/ClassifierAccuracy/experiments.py @@ -12,7 +12,7 @@ if PROBLEM == 'binary': gen_datasets = gen_bin_datasets elif PROBLEM == 'multiclass': qp.environ['SAMPLE_SIZE'] = 250 - NUM_TEST = 100 + NUM_TEST = 1000 gen_datasets = gen_multi_datasets @@ -34,13 +34,14 @@ for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifier # instances of ClassifierAccuracyPrediction are bound to the evaluation measure, so they # must be nested in the acc-for for acc_name, acc_fn in gen_acc_measure(): + print(f'\tfor measure {acc_name}') for (method_name, method) in gen_CAP(h, acc_fn, with_oracle=ORACLE): result_path = getpath(basedir, cls_name, acc_name, dataset_name, method_name) if os.path.exists(result_path): - print(f'\t{method_name}-{acc_name} exists, skipping') + print(f'\t\t{method_name}-{acc_name} exists, skipping') continue - print(f'\t{method_name} computing...') + print(f'\t\t{method_name} computing...') method, t_train = fit_method(method, V) estim_accs, t_test_ave = predictionsCAP(method, test_prot, ORACLE) save_json_result(result_path, true_accs[acc_name], estim_accs, t_train, t_test_ave) @@ -49,10 +50,10 @@ for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifier # be nested to the predictions to speed up things for (method_name, method) in gen_CAP_cont_table(h): if not any_missing(basedir, cls_name, dataset_name, method_name): - print(f'\tmethod {method_name} has all results already computed. Skipping.') + print(f'\t\tmethod {method_name} has all results already computed. Skipping.') continue - print(f'\tmethod {method_name} computing...') + print(f'\t\tmethod {method_name} computing...') method, t_train = fit_method(method, V) estim_accs_dict, t_test_ave = predictionsCAPcont_table(method, test_prot, gen_acc_measure, ORACLE) @@ -65,7 +66,6 @@ for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifier # generate diagonal plots print('generating plots') for (cls_name, _), (acc_name, _) in itertools.product(gen_classifiers(), gen_acc_measure()): - methods = get_method_names() plot_diagonal(basedir, cls_name, acc_name) for dataset_name, _ in gen_datasets(only_names=True): plot_diagonal(basedir, cls_name, acc_name, dataset_name=dataset_name) diff --git a/ClassifierAccuracy/models_multiclass.py b/ClassifierAccuracy/models_multiclass.py index 15b80cd..a3bc98d 100644 --- a/ClassifierAccuracy/models_multiclass.py +++ b/ClassifierAccuracy/models_multiclass.py @@ -329,7 +329,7 @@ class SebastianiCAP(ClassifierAccuracyPrediction): class PabloCAP(ClassifierAccuracyPrediction): - def __init__(self, h, acc_fn, q_class, n_val_samples=50, aggr='mean'): + def __init__(self, h, acc_fn, q_class, n_val_samples=100, aggr='mean'): self.h = h self.acc = acc_fn self.q = q_class(h) @@ -434,7 +434,7 @@ class QuAcc1xN2(CAPContingencyTableQ, QuAcc): add_maxinfsoft=False): self.h = h self.acc = acc - self.q = EmptySaveQuantifier(q_class) + self.q = EmptySafeQuantifier(q_class) self.add_X = add_X self.add_posteriors = add_posteriors self.add_maxconf = add_maxconf @@ -490,7 +490,7 @@ class QuAccNxN(CAPContingencyTableQ, QuAcc): y_i = true_labels[pred_labels==class_i] data_i = LabelledCollection(X_dot_i, y_i, classes=val.classes_) - q_i = EmptySaveQuantifier(deepcopy(self.q_class)) + q_i = EmptySafeQuantifier(deepcopy(self.q_class)) q_i.fit(data_i) self.q.append(q_i) @@ -518,7 +518,7 @@ def safehstack(X, P): return XP -class EmptySaveQuantifier(BaseQuantifier): +class EmptySafeQuantifier(BaseQuantifier): def __init__(self, surrogate_quantifier: BaseQuantifier): self.surrogate = surrogate_quantifier @@ -616,11 +616,12 @@ class ATC(ClassifierAccuracyPrediction): class DoC(ClassifierAccuracyPrediction): - def __init__(self, h, acc, sample_size, num_samples=500): + def __init__(self, h, acc, sample_size, num_samples=500, clip_vals=(0,1)): self.h = h self.acc = acc self.sample_size = sample_size self.num_samples = num_samples + self.clip_vals = clip_vals def _get_post_stats(self, X, y): P = get_posteriors_from_h(self.h, X) @@ -660,6 +661,8 @@ class DoC(ClassifierAccuracyPrediction): P = get_posteriors_from_h(self.h, X) mc = max_conf(P) acc_pred = self.predict_regression(mc)[0] + if self.clip_vals is not None: + acc_pred = np.clip(acc_pred, *self.clip_vals) return acc_pred """ diff --git a/ClassifierAccuracy/util/commons.py b/ClassifierAccuracy/util/commons.py index 7d1a55e..2896b61 100644 --- a/ClassifierAccuracy/util/commons.py +++ b/ClassifierAccuracy/util/commons.py @@ -6,12 +6,14 @@ from glob import glob from pathlib import Path from time import time import numpy as np +from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics import accuracy_score, f1_score -from sklearn.datasets import fetch_rcv1 +from sklearn.datasets import fetch_rcv1, fetch_20newsgroups from sklearn.model_selection import GridSearchCV from ClassifierAccuracy.models_multiclass import * +from ClassifierAccuracy.util.tabular import Table from quapy.method.aggregative import EMQ, ACC, KDEyML from quapy.data import LabelledCollection @@ -41,6 +43,16 @@ def gen_multi_datasets(only_names=False)-> [str,[LabelledCollection,LabelledColl else: dataset = fetch_UCIMulticlassLabelledCollection(dataset_name) yield dataset_name, split(dataset) + train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes')) + test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes')) + tfidf = TfidfVectorizer(min_df=5, sublinear_tf=True) + Xtr = tfidf.fit_transform(train.data) + Xte = tfidf.transform((test.data)) + train = LabelledCollection(instances=Xtr, labels=train.target) + U = LabelledCollection(instances=Xte, labels=test.target) + T, V = train.split_stratified(train_prop=0.5, random_state=0) + yield "20news", (T, V, U) + def gen_bin_datasets(only_names=False) -> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]: @@ -71,7 +83,7 @@ def gen_CAP(h, acc_fn, with_oracle=False)->[str, ClassifierAccuracyPrediction]: #yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML) #yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0) #yield 'PabCAP', PabloCAP(h, acc_fn, ACC) - #yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median') + yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median') yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf') #yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy') yield 'DoC', DoC(h, acc_fn, sample_size=qp.environ['SAMPLE_SIZE']) @@ -288,7 +300,7 @@ def get_dataset_stats(path, test_prot, L, V): def gen_tables(basedir, datasets): - from tabular import Table + mock_h = LogisticRegression(), methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)] @@ -313,7 +325,7 @@ def gen_tables(basedir, datasets): classifier = classifiers[0] for metric in [measure for measure, _ in gen_acc_measure()]: - table = Table(datasets, methods) + table = Table(datasets, methods, prec_mean=5, clean_zero=True) for method, dataset in itertools.product(methods, datasets): path = getpath(basedir, classifier, metric, dataset, method) if not os.path.exists(path): diff --git a/ClassifierAccuracy/util/plotting.py b/ClassifierAccuracy/util/plotting.py index 18ee82e..7771edc 100644 --- a/ClassifierAccuracy/util/plotting.py +++ b/ClassifierAccuracy/util/plotting.py @@ -16,7 +16,7 @@ def plot_diagonal(basedir, cls_name, measure_name, dataset_name='*'): xs.append(results[method_name]['true_acc']) ys.append(results[method_name]['estim_acc']) plotsubdir = 'all' if dataset_name=='*' else dataset_name - save_path = join('plots', basedir, plotsubdir, 'diagonal.png') + save_path = join('plots', basedir, measure_name, plotsubdir, 'diagonal.png') _plot_diagonal(methods, xs, ys, save_path, measure_name) @@ -31,7 +31,7 @@ def _plot_diagonal(methods_names, true_xs, estim_ys, save_path, measure_name, ti plt.plot([0, 1], [0, 1], color='black', linestyle='--') for (method_name, xs, ys) in zip(methods_names, true_xs, estim_ys): - plt.scatter(xs, ys, label=f'{method_name}', alpha=0.6) + plt.scatter(xs, ys, label=f'{method_name}', alpha=0.5, linewidths=0) plt.legend() diff --git a/examples/custom_quantifier.py b/examples/custom_quantifier.py index fa014de..9c89714 100644 --- a/examples/custom_quantifier.py +++ b/examples/custom_quantifier.py @@ -1,33 +1,79 @@ import quapy as qp from quapy.data import LabelledCollection -from quapy.method.base import BinaryQuantifier +from quapy.method.base import BinaryQuantifier, BaseQuantifier from quapy.model_selection import GridSearchQ from quapy.method.aggregative import AggregativeSoftQuantifier from quapy.protocol import APP import numpy as np from sklearn.linear_model import LogisticRegression +from time import time # Define a custom quantifier: for this example, we will consider a new quantification algorithm that uses a # logistic regressor for generating posterior probabilities, and then applies a custom threshold value to the # posteriors. Since the quantifier internally uses a classifier, it is an aggregative quantifier; and since it -# relies on posterior probabilities, it is a probabilistic-aggregative quantifier. Note also it has an -# internal hyperparameter (let say, alpha) which is the decision threshold. Let's also assume the quantifier -# is binary, for simplicity. +# relies on posterior probabilities, it is a probabilistic-aggregative quantifier (aka AggregativeSoftQuantifier). +# Note also it has an internal hyperparameter (let say, alpha) which is the decision threshold. +# +# Let's also assume the quantifier is binary, for simplicity. Any quantifier (i.e., any subclass of BaseQuantifier) +# is required to implement the "fit" and "quantify" methods. Aggregative quantifiers are special subtypes of base +# quantifiers, i.e., are quantifiers that undertake a classification-phase followed by an aggregation-phase. QuaPy +# already implements most common functionality, and requires the developer to simply implement the "aggregation_fit" +# and the "aggregation" methods. +# +# We are providing two implementations of the same method to illustrate this characteristic of QuaPy. Let us begin +# with the general case, in which we implement a (base) quantifier + +class MyQuantifier(BaseQuantifier): -class MyQuantifier(AggregativeSoftQuantifier, BinaryQuantifier): def __init__(self, classifier, alpha=0.5): self.alpha = alpha - # aggregative quantifiers have an internal self.classifier attribute self.classifier = classifier - def fit(self, data: LabelledCollection, fit_classifier=True): - assert fit_classifier, 'this quantifier needs to fit the classifier!' + # in general, we would need to implement the method fit(self, data: LabelledCollection, fit_classifier=True, + # val_split=None); this would amount to: + def fit(self, data: LabelledCollection): + assert data.n_classes==2, \ + 'this quantifier is only valid for binary problems [abort]' self.classifier.fit(*data.Xy) return self - # in general, we would need to implement the method quantify(self, instances) but, since this method is of - # type aggregative, we can simply implement the method aggregate, which has the following interface + # in general, we would need to implement the method quantify(self, instances); this would amount to: + def quantify(self, instances): + assert hasattr(self.classifier, 'predict_proba'), \ + 'the underlying classifier is not probabilistic! [abort]' + posterior_probabilities = self.classifier.predict_proba(instances) + positive_probabilities = posterior_probabilities[:, 1] + crisp_decisions = positive_probabilities > self.alpha + pos_prev = crisp_decisions.mean() + neg_prev = 1 - pos_prev + return np.asarray([neg_prev, pos_prev]) + + +# Note that the above implementation contains a lot of boilerplate code. Many parts can be omitted since QuaPy +# provides implementations for them. Some of these routines (like, for example, training a classifier and generating +# posterior probabilities) are often carried out in a k-fold cross-validation manner. These, along with many other +# common routines are already provided by highly-optimized routines in QuaPy. Let's see a much better implementation +# of the method, now adhering to the AggregativeSoftQuantifier: + +class MyAggregativeSoftQuantifier(AggregativeSoftQuantifier, BinaryQuantifier): + def __init__(self, classifier, alpha=0.5): + # aggregative quantifiers have an internal attribute called self.classifier + self.classifier = classifier + self.alpha = alpha + + # since this method is of type aggregative, we can simply implement the method aggregation_fit, which + # assumes the classifier has already been fitted properly and the predictions for the training set required + # to train the aggregation function have been properly generated (i.e., on a validation split, or using a + # k-fold cross validation strategy). What remains ahead is to learn an aggregation function. In our case + # this amounts to doing... nothing, since our method was pretty basic. BinaryQuantifier also add some + # basic functionality for checking binary consistency. + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + pass + + # since this method is of type aggregative, we can simply implement the method aggregate (i.e., we should + # only describe what to do with the classifier predictions --which in this case are posterior probabilities + # because we are inheriting from the "Soft" subtype). This comes down to: def aggregate(self, classif_predictions: np.ndarray): # the posterior probabilities have already been generated by the quantify method; we only need to # specify what to do with them @@ -38,31 +84,68 @@ class MyQuantifier(AggregativeSoftQuantifier, BinaryQuantifier): return np.asarray([neg_prev, pos_prev]) +# a small example using these two implementations of our method + if __name__ == '__main__': - qp.environ['SAMPLE_SIZE'] = 100 - - # define an instance of our custom quantifier - quantifier = MyQuantifier(LogisticRegression(), alpha=0.5) + qp.environ['SAMPLE_SIZE'] = 250 # load the IMDb dataset train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test + train, val = train.split_stratified(train_prop=0.75) # let's create a validation set for optimizing hyperparams - # model selection - # let us assume we want to explore our hyperparameter alpha along with one hyperparameter of the classifier - train, val = train.split_stratified(train_prop=0.75) - param_grid = { - 'alpha': np.linspace(0, 1, 11), # quantifier-dependent hyperparameter - 'classifier__C': np.logspace(-2, 2, 5) # classifier-dependent hyperparameter - } - quantifier = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=True).fit(train) + def test_implementation(quantifier): + class_name = quantifier.__class__.__name__ + print(f'\ntesting implementation {class_name}...') + # model selection + # let us assume we want to explore our hyperparameter alpha along with one hyperparameter of the classifier + tinit = time() + param_grid = { + 'alpha': np.linspace(0, 1, 11), # quantifier-dependent hyperparameter + 'classifier__C': np.logspace(-2, 2, 5) # classifier-dependent hyperparameter + } + gridsearch = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=False).fit(train) + t_modsel = time() - tinit + print(f'\tmodel selection took {t_modsel:.2f}s', flush=True) - # evaluation - mae = qp.evaluation.evaluate(quantifier, protocol=APP(test), error_metric='mae') + # evaluation + optimized_model = gridsearch.best_model_ + mae = qp.evaluation.evaluate( + optimized_model, + protocol=APP(test, repeats=5000, sanity_check=None), # disable the check, we want to generate many tests! + error_metric='mae', + verbose=True) - print(f'MAE = {mae:.4f}') + t_eval = time() - t_modsel - tinit + print(f'\tevaluation took {t_eval:.2f}s [MAE = {mae:.4f}]') - # final remarks: this method is only for demonstration purposes and makes little sense in general. The method relies + # define an instance of our custom quantifier and test it! + quantifier = MyQuantifier(LogisticRegression(), alpha=0.5) + test_implementation(quantifier) + + # define an instance of our custom quantifier, with the second implementation, and test it! + quantifier = MyAggregativeSoftQuantifier(LogisticRegression(), alpha=0.5) + test_implementation(quantifier) + + # the output should look like this: + """ + testing implementation MyQuantifier... + model selection took 12.86s + predicting: 100%|██████████| 105000/105000 [00:22<00:00, 4626.30it/s] + evaluation took 22.75s [MAE = 0.0630] + + testing implementation MyAggregativeSoftQuantifier... + model selection took 3.10s + speeding up the prediction for the aggregative quantifier, total classifications 25000 instead of 26250000 + predicting: 100%|██████████| 105000/105000 [00:04<00:00, 22779.62it/s] + evaluation took 4.66s [MAE = 0.0630] + """ + # Note that the first implementation is much slower, both in terms of grid-search optimization and in terms of + # evaluation. The reason why is that QuaPy is highly optimized for aggregative quantifiers (by far, the most + # popular type of quantification methods), thus significantly speeding up model selection and test routines. + # Furthermore, it is simpler to extend an aggregation type since QuaPy implements boilerplate functions for you. + + # Final remarks: this method is only for demonstration purposes and makes little sense in general. The method relies # on an hyperparameter alpha for binarizing the posterior probabilities. A much better way for fulfilling this # goal would be to calibrate the classifier (LogisticRegression is already reasonably well calibrated) and then # simply cut at 0.5.