diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt index b5e4f74..c3e1e64 100644 --- a/CHANGE_LOG.txt +++ b/CHANGE_LOG.txt @@ -1,5 +1,11 @@ Change Log 0.1.9 ---------------- +- [TODO] add LeQua2024 +- [TODO] add njobs to env +- [TODO] add basic examples +- [TODO] add default classifier to env +- [TODO] add default classifier to env + - Moved the wiki documents to the ./docs/ folder so that they become editable via PR for the community - Added Composable methods from Mirko Bunse's qunfold library! (thanks to Mirko Bunse!) diff --git a/examples/0.basics.py b/examples/0.basics.py new file mode 100644 index 0000000..ea3a7c1 --- /dev/null +++ b/examples/0.basics.py @@ -0,0 +1,86 @@ +""" +This is a basic example showcasing some of the important concepts behind quapy. +First of all, import quapy. Wou would typically import quapy in the following way +""" +import numpy as np +from sklearn.linear_model import LogisticRegression + +import quapy as qp + +# let's fetch some dataset to run one experiment +# datasets are available in the "qp.data.datasets" module (there is a shortcut in qp.datasets) + +data = qp.datasets.fetch_reviews('hp') + +# The data are in plain text format. You can convert them into tfidf using some utilities available in the +# qp.data.preprocessing module, e.g.: + +data = qp.data.preprocessing.text2tfidf(data, min_df=5) + +# you can obtain the same result by specifying tfidf=True it in the fetch function: +# data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=5) + +# data is an object of type Dataset, a very basic collection that contains a "training" and a "test" collection inside. +train, test = data.train_test + +# train and test are instances of LabelledCollection, a class that contains covariates (X) and true labels (y), along +# with sampling functionality. Here are some examples of usage: +X, y = train.Xy +print(f'number of classes {train.n_classes}') +print(f'class names {train.classes_}') + +import quapy.functional as F # <- this module has some functional utilities, like a string formatter for prevalences +print(f'training prevalence = {F.strprev(train.prevalence())}') + +# let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier +classifier = LogisticRegression() + +pacc = qp.method.aggregative.PACC(classifier) + +print(f'training {pacc}') +pacc.fit(train) + +# let's now test our quantifier on the test data (of course, we should not use the test labels y at this point, only X) +X_test = test.X +estim_prevalence = pacc.quantify(X_test) + +print(f'estimated test prevalence = {F.strprev(estim_prevalence)}') +print(f'true test prevalence = {F.strprev(test.prevalence())}') + +# let us use some evaluation metric to check how well our quantifier fared. +# Error metrics are available in the qp.error module. + +mae_error = qp.error.mae(test.prevalence(), estim_prevalence) +print(f'MAE={mae_error:.4f}') + +# In quantification, we typically use an evaluation protocol to test the performance of a quantification method. +# The reason is that, even though the test set contains many instances, the whole counts as 1 single datapoint to +# the quantifier, because quantification targets samples of instances as a whole (while classification, or regression, +# target instances individually). +# Quapy provides some standard protocols in qp.protocol. We will use the artificial prevalence protocol (APP). APP +# works by generating many test samples, out of our original test collection, characterized by different prevalence +# values. To do so, a grid of prevalence values is explored, and different samples are generated conditioned on each +# prevalence vector. This way, the quantifier is stress-tested on a wide range of prevalence values, i.e., under +# prior probability shift conditions. + +# In this case we use "test" and not only "test.X" since the protocol needs to know the class labels in order +# to generate samples at different prevalences. We will generate samples of 100 instances, from a grid of 21 values, +# i.e., from a grid = [0.0, 0.05, 0.10, ..., 1.00], and only one sample (repeats) for each combination. +app = qp.protocol.APP(test, sample_size=100, n_prevalences=21, repeats=1) + +# let's print some examples: +show=5 +for i, (sample, prev) in enumerate(app()): + print(f'sample-{i}: {F.strprev(prev)}') + if i+1==5: + break + +# we can use the evaluation routine provided in quapy to test our method using a given protocol in terms of +# one specific error metric +absolute_errors = qp.evaluation.evaluate(model=pacc, protocol=app, error_metric='ae') +print(f'MAE = {np.mean(absolute_errors):.4f}+-{np.std(absolute_errors):.4f}') + + + + + diff --git a/examples/model_selection.py b/examples/1.model_selection.py similarity index 100% rename from examples/model_selection.py rename to examples/1.model_selection.py diff --git a/examples/one_vs_all.py b/examples/10.one_vs_all.py similarity index 100% rename from examples/one_vs_all.py rename to examples/10.one_vs_all.py diff --git a/examples/comparing_HDy_HDx.py b/examples/11.comparing_HDy_HDx.py similarity index 96% rename from examples/comparing_HDy_HDx.py rename to examples/11.comparing_HDy_HDx.py index 9ee3190..7d96b6a 100644 --- a/examples/comparing_HDy_HDx.py +++ b/examples/11.comparing_HDy_HDx.py @@ -25,7 +25,9 @@ df = pd.DataFrame(columns=['method', 'dataset', 'MAE', 'MRAE', 'tr-time', 'te-ti for dataset_name in tqdm(qp.datasets.UCI_BINARY_DATASETS, total=len(qp.datasets.UCI_BINARY_DATASETS)): - if dataset_name in ['acute.a', 'acute.b', 'balance.2', 'iris.1']: continue + if dataset_name in ['acute.a', 'acute.b', 'balance.2', 'iris.1']: + # these datasets tend to produce either too good or too bad results... + continue collection = qp.datasets.fetch_UCIBinaryLabelledCollection(dataset_name, verbose=False) train, test = collection.split_stratified() diff --git a/examples/custom_protocol.py b/examples/12.custom_protocol.py similarity index 100% rename from examples/custom_protocol.py rename to examples/12.custom_protocol.py diff --git a/examples/bayesian_quantification.py b/examples/13.bayesian_quantification.py similarity index 99% rename from examples/bayesian_quantification.py rename to examples/13.bayesian_quantification.py index 2d0f4ed..a4ec1fc 100644 --- a/examples/bayesian_quantification.py +++ b/examples/13.bayesian_quantification.py @@ -13,7 +13,7 @@ $ pip install quapy[bayesian] Running the script via: ``` -$ python examples/bayesian_quantification.py +$ python examples/13.bayesian_quantification.py ``` will produce a plot `bayesian_quantification.pdf`. diff --git a/examples/custom_quantifier.py b/examples/2.custom_quantifier.py similarity index 100% rename from examples/custom_quantifier.py rename to examples/2.custom_quantifier.py diff --git a/examples/lequa2022_experiments.py b/examples/4.lequa2022_experiments.py similarity index 100% rename from examples/lequa2022_experiments.py rename to examples/4.lequa2022_experiments.py diff --git a/examples/explicit_loss_minimization.py b/examples/5.explicit_loss_minimization.py similarity index 98% rename from examples/explicit_loss_minimization.py rename to examples/5.explicit_loss_minimization.py index fcc07f3..f8f210d 100644 --- a/examples/explicit_loss_minimization.py +++ b/examples/5.explicit_loss_minimization.py @@ -33,7 +33,7 @@ returns an instance of SVM(Q) (i.e., an instance of CC properly set to work with Since we wan to explore the losses, we will instead use newELM. For this example we will create a quantifier for tweet sentiment analysis considering three classes: negative, neutral, and positive. Since SVMperf is a binary classifier, our quantifier will be binary as well. We will use a one-vs-all approach to work in multiclass model. -For more details about how one-vs-all works, we refer to the example "one_vs_all.py" and to the API documentation. +For more details about how one-vs-all works, we refer to the example "10.one_vs_all.py" and to the API documentation. """ qp.environ['SAMPLE_SIZE'] = 100 diff --git a/examples/quanet_example.py b/examples/6.quanet_example.py similarity index 100% rename from examples/quanet_example.py rename to examples/6.quanet_example.py diff --git a/examples/uci_experiments.py b/examples/7.uci_experiments.py similarity index 100% rename from examples/uci_experiments.py rename to examples/7.uci_experiments.py diff --git a/examples/ucimulti_experiments.py b/examples/8.ucimulti_experiments.py similarity index 100% rename from examples/ucimulti_experiments.py rename to examples/8.ucimulti_experiments.py diff --git a/examples/ifcb_experiments.py b/examples/9.ifcb_experiments.py similarity index 100% rename from examples/ifcb_experiments.py rename to examples/9.ifcb_experiments.py diff --git a/examples/lequa2022_experiments_recalib.py b/examples/lequa2022_experiments_recalib.py deleted file mode 100644 index a5a0e05..0000000 --- a/examples/lequa2022_experiments_recalib.py +++ /dev/null @@ -1,63 +0,0 @@ -import numpy as np -from abstention.calibration import NoBiasVectorScaling, VectorScaling, TempScaling -from sklearn.calibration import CalibratedClassifierCV -from sklearn.linear_model import LogisticRegression -import quapy as qp -import quapy.functional as F -from classification.calibration import RecalibratedProbabilisticClassifierBase, NBVSCalibration, \ - BCTSCalibration -from data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022 -from evaluation import evaluation_report -from method.aggregative import EMQ -from model_selection import GridSearchQ -import pandas as pd - -for task in ['T1A', 'T1B']: - - # calibration = TempScaling(verbose=False, bias_positions='all') - - qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task] - training, val_generator, test_generator = fetch_lequa2022(task=task) - - # define the quantifier - # learner = BCTSCalibration(LogisticRegression(), n_jobs=-1) - # learner = CalibratedClassifierCV(LogisticRegression()) - learner = LogisticRegression() - quantifier = EMQ(classifier=learner) - - # model selection - param_grid = { - 'classifier__C': np.logspace(-3, 3, 7), - 'classifier__class_weight': ['balanced', None], - 'recalib': ['platt', 'ts', 'vs', 'nbvs', 'bcts', None], - 'exact_train_prev': [False, True] - } - model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', n_jobs=-1, refit=False, verbose=True) - quantifier = model_selection.fit(training) - - # evaluation - report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True) - - # import os - # os.makedirs(f'./out', exist_ok=True) - # with open(f'./out/EMQ_{calib}_{task}.txt', 'wt') as foo: - # estim_prev = report['estim-prev'].values - # nclasses = len(estim_prev[0]) - # foo.write(f'id,'+','.join([str(x) for x in range(nclasses)])+'\n') - # for id, prev in enumerate(estim_prev): - # foo.write(f'{id},'+','.join([f'{p:.5f}' for p in prev])+'\n') - # - # #os.makedirs(f'./errors/{task}', exist_ok=True) - # with open(f'./out/EMQ_{calib}_{task}_errors.txt', 'wt') as foo: - # maes, mraes = report['mae'].values, report['mrae'].values - # foo.write(f'id,AE,RAE\n') - # for id, (ae_i, rae_i) in enumerate(zip(maes, mraes)): - # foo.write(f'{id},{ae_i:.5f},{rae_i:.5f}\n') - - # printing results - pd.set_option('display.expand_frame_repr', False) - report['estim-prev'] = report['estim-prev'].map(F.strprev) - print(report) - - print('Averaged values:') - print(report.mean()) diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 5e58250..63a179e 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -791,7 +791,7 @@ def fetch_lequa2022(task, data_home=None): The datasets are downloaded only once, and stored for fast reuse. - See `lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these + See `4.lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these datasets.