From 2485117f05d2bca08f187ab2cb0b46961d4b1f2c Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Wed, 8 Feb 2023 19:06:53 +0100 Subject: [PATCH] adding documentation and adding one new example --- docs/build/html/Datasets.html | 555 +- docs/build/html/Evaluation.html | 57 +- docs/build/html/Installation.html | 57 +- docs/build/html/Methods.html | 99 +- docs/build/html/Model-Selection.html | 65 +- docs/build/html/Plotting.html | 71 +- docs/build/html/_sources/index.rst.txt | 2 +- .../_sources/quapy.classification.rst.txt | 23 +- docs/build/html/_sources/quapy.data.rst.txt | 18 +- docs/build/html/_sources/quapy.method.rst.txt | 22 +- docs/build/html/_sources/quapy.rst.txt | 58 +- docs/build/html/_sources/quapy.tests.rst.txt | 37 - docs/build/html/_sources/readme.rst.txt | 7 - docs/build/html/_sources/readme2.md.txt | 1 - docs/build/html/_static/alabaster.css | 701 - docs/build/html/_static/basic.css | 62 +- docs/build/html/_static/bizstyle.css | 2 + docs/build/html/_static/bizstyle.js | 43 +- docs/build/html/_static/custom.css | 1 - docs/build/html/_static/doctools.js | 373 +- .../html/_static/documentation_options.js | 8 +- docs/build/html/_static/jquery-3.5.1.js | 10872 ---------------- docs/build/html/_static/jquery.js | 4 +- docs/build/html/_static/language_data.js | 102 +- docs/build/html/_static/searchtools.js | 808 +- docs/build/html/genindex.html | 319 +- docs/build/html/index.html | 45 +- docs/build/html/modules.html | 78 +- docs/build/html/objects.inv | Bin 2591 -> 2873 bytes docs/build/html/py-modindex.html | 24 +- docs/build/html/quapy.classification.html | 448 +- docs/build/html/quapy.data.html | 482 +- docs/build/html/quapy.html | 1076 +- docs/build/html/quapy.method.html | 1308 +- docs/build/html/quapy.tests.html | 135 - docs/build/html/readme.html | 129 - docs/build/html/readme2.html | 92 - docs/build/html/search.html | 12 +- docs/build/html/searchindex.js | 2 +- examples/custom_quantifier.py | 69 + quapy/CHANGE_LOG.txt | 78 +- quapy/__init__.py | 21 +- quapy/classification/calibration.py | 77 +- quapy/classification/svmperf.py | 1 + quapy/data/datasets.py | 24 + quapy/data/preprocessing.py | 9 +- quapy/depr_evaluation.py | 439 - quapy/error.py | 5 - quapy/functional.py | 27 +- quapy/method/aggregative.py | 58 +- quapy/method/base.py | 23 +- quapy/method/meta.py | 8 +- quapy/method/non_aggregative.py | 27 - quapy/model_selection.py | 2 +- quapy/protocol.py | 69 +- quapy/tests/test_methods.py | 17 +- quapy/tests/test_modsel.py | 22 +- quapy/tests/test_protocols.py | 6 +- quapy/util.py | 18 +- 59 files changed, 3593 insertions(+), 15605 deletions(-) delete mode 100644 docs/build/html/_sources/quapy.tests.rst.txt delete mode 100644 docs/build/html/_sources/readme.rst.txt delete mode 100644 docs/build/html/_sources/readme2.md.txt delete mode 100644 docs/build/html/_static/alabaster.css delete mode 100644 docs/build/html/_static/custom.css delete mode 100644 docs/build/html/_static/jquery-3.5.1.js delete mode 100644 docs/build/html/quapy.tests.html delete mode 100644 docs/build/html/readme.html delete mode 100644 docs/build/html/readme2.html create mode 100644 examples/custom_quantifier.py delete mode 100644 quapy/depr_evaluation.py diff --git a/docs/build/html/Datasets.html b/docs/build/html/Datasets.html index 6af836e..9c9eaa7 100644 --- a/docs/build/html/Datasets.html +++ b/docs/build/html/Datasets.html @@ -2,23 +2,26 @@ - + - - Datasets — QuaPy 0.1.6 documentation + + + Datasets — QuaPy 0.1.7 documentation + + - - + + - - - -
-
-
-
- -
-

quapy.tests package

-
-

Submodules

-
-
-

quapy.tests.test_base module

-
-
-

quapy.tests.test_datasets module

-
-
-

quapy.tests.test_methods module

-
-
-

Module contents

-
-
- - -
-
-
-
- -
-
- - - - \ No newline at end of file diff --git a/docs/build/html/readme.html b/docs/build/html/readme.html deleted file mode 100644 index c223f24..0000000 --- a/docs/build/html/readme.html +++ /dev/null @@ -1,129 +0,0 @@ - - - - - - - - - Getting Started — QuaPy 0.1.6 documentation - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Getting Started

-

QuaPy is an open source framework for Quantification (a.k.a. Supervised Prevalence Estimation) written in Python.

-
-

Installation

-
>>> pip install quapy
-
-
-
-
- - -
-
-
-
- -
-
- - - - \ No newline at end of file diff --git a/docs/build/html/readme2.html b/docs/build/html/readme2.html deleted file mode 100644 index e5ff4a6..0000000 --- a/docs/build/html/readme2.html +++ /dev/null @@ -1,92 +0,0 @@ - - - - - - - - - <no title> — QuaPy 0.1.6 documentation - - - - - - - - - - - - - - - -
-
-
-
- -

.. include:: ../../README.md

- - -
-
-
-
- -
-
- - - - \ No newline at end of file diff --git a/docs/build/html/search.html b/docs/build/html/search.html index 2090979..480e246 100644 --- a/docs/build/html/search.html +++ b/docs/build/html/search.html @@ -2,11 +2,11 @@ - + - Search — QuaPy 0.1.6 documentation + Search — QuaPy 0.1.7 documentation @@ -14,7 +14,9 @@ + + @@ -37,7 +39,7 @@
  • modules |
  • - + @@ -97,13 +99,13 @@
  • modules |
  • - + \ No newline at end of file diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 2c03e3a..1f4419a 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["Datasets","Evaluation","Installation","Methods","Model-Selection","Plotting","index","modules","quapy","quapy.classification","quapy.data","quapy.method"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["Datasets.md","Evaluation.md","Installation.rst","Methods.md","Model-Selection.md","Plotting.md","index.rst","modules.rst","quapy.rst","quapy.classification.rst","quapy.data.rst","quapy.method.rst"],objects:{"":{quapy:[8,0,0,"-"]},"quapy.classification":{methods:[9,0,0,"-"],neural:[9,0,0,"-"],svmperf:[9,0,0,"-"]},"quapy.classification.methods":{LowRankLogisticRegression:[9,1,1,""]},"quapy.classification.methods.LowRankLogisticRegression":{fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural":{CNNnet:[9,1,1,""],LSTMnet:[9,1,1,""],NeuralClassifierTrainer:[9,1,1,""],TextClassifierNet:[9,1,1,""],TorchDataset:[9,1,1,""]},"quapy.classification.neural.CNNnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.LSTMnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.NeuralClassifierTrainer":{device:[9,3,1,""],fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],reset_net_params:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural.TextClassifierNet":{dimensions:[9,2,1,""],document_embedding:[9,2,1,""],forward:[9,2,1,""],get_params:[9,2,1,""],predict_proba:[9,2,1,""],vocabulary_size:[9,3,1,""],xavier_uniform:[9,2,1,""]},"quapy.classification.neural.TorchDataset":{asDataloader:[9,2,1,""]},"quapy.classification.svmperf":{SVMperf:[9,1,1,""]},"quapy.classification.svmperf.SVMperf":{decision_function:[9,2,1,""],fit:[9,2,1,""],predict:[9,2,1,""],set_params:[9,2,1,""],valid_losses:[9,4,1,""]},"quapy.data":{base:[10,0,0,"-"],datasets:[10,0,0,"-"],preprocessing:[10,0,0,"-"],reader:[10,0,0,"-"]},"quapy.data.base":{Dataset:[10,1,1,""],LabelledCollection:[10,1,1,""],isbinary:[10,5,1,""]},"quapy.data.base.Dataset":{SplitStratified:[10,2,1,""],binary:[10,3,1,""],classes_:[10,3,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],stats:[10,2,1,""],vocabulary_size:[10,3,1,""]},"quapy.data.base.LabelledCollection":{Xy:[10,3,1,""],artificial_sampling_generator:[10,2,1,""],artificial_sampling_index_generator:[10,2,1,""],binary:[10,3,1,""],counts:[10,2,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],natural_sampling_generator:[10,2,1,""],natural_sampling_index_generator:[10,2,1,""],prevalence:[10,2,1,""],sampling:[10,2,1,""],sampling_from_index:[10,2,1,""],sampling_index:[10,2,1,""],split_stratified:[10,2,1,""],stats:[10,2,1,""],uniform_sampling:[10,2,1,""],uniform_sampling_index:[10,2,1,""]},"quapy.data.datasets":{fetch_UCIDataset:[10,5,1,""],fetch_UCILabelledCollection:[10,5,1,""],fetch_reviews:[10,5,1,""],fetch_twitter:[10,5,1,""],warn:[10,5,1,""]},"quapy.data.preprocessing":{IndexTransformer:[10,1,1,""],index:[10,5,1,""],reduce_columns:[10,5,1,""],standardize:[10,5,1,""],text2tfidf:[10,5,1,""]},"quapy.data.preprocessing.IndexTransformer":{add_word:[10,2,1,""],fit:[10,2,1,""],fit_transform:[10,2,1,""],transform:[10,2,1,""],vocabulary_size:[10,2,1,""]},"quapy.data.reader":{binarize:[10,5,1,""],from_csv:[10,5,1,""],from_sparse:[10,5,1,""],from_text:[10,5,1,""],reindex_labels:[10,5,1,""]},"quapy.error":{absolute_error:[8,5,1,""],acc_error:[8,5,1,""],acce:[8,5,1,""],ae:[8,5,1,""],f1_error:[8,5,1,""],f1e:[8,5,1,""],from_name:[8,5,1,""],kld:[8,5,1,""],mae:[8,5,1,""],mean_absolute_error:[8,5,1,""],mean_relative_absolute_error:[8,5,1,""],mkld:[8,5,1,""],mnkld:[8,5,1,""],mrae:[8,5,1,""],mse:[8,5,1,""],nkld:[8,5,1,""],rae:[8,5,1,""],relative_absolute_error:[8,5,1,""],se:[8,5,1,""],smooth:[8,5,1,""]},"quapy.evaluation":{artificial_prevalence_prediction:[8,5,1,""],artificial_prevalence_protocol:[8,5,1,""],artificial_prevalence_report:[8,5,1,""],evaluate:[8,5,1,""],gen_prevalence_prediction:[8,5,1,""],gen_prevalence_report:[8,5,1,""],natural_prevalence_prediction:[8,5,1,""],natural_prevalence_protocol:[8,5,1,""],natural_prevalence_report:[8,5,1,""]},"quapy.functional":{HellingerDistance:[8,5,1,""],adjusted_quantification:[8,5,1,""],artificial_prevalence_sampling:[8,5,1,""],get_nprevpoints_approximation:[8,5,1,""],normalize_prevalence:[8,5,1,""],num_prevalence_combinations:[8,5,1,""],prevalence_from_labels:[8,5,1,""],prevalence_from_probabilities:[8,5,1,""],prevalence_linspace:[8,5,1,""],strprev:[8,5,1,""],uniform_prevalence_sampling:[8,5,1,""],uniform_simplex_sampling:[8,5,1,""]},"quapy.method":{aggregative:[11,0,0,"-"],base:[11,0,0,"-"],meta:[11,0,0,"-"],neural:[11,0,0,"-"],non_aggregative:[11,0,0,"-"]},"quapy.method.aggregative":{ACC:[11,1,1,""],AdjustedClassifyAndCount:[11,4,1,""],AggregativeProbabilisticQuantifier:[11,1,1,""],AggregativeQuantifier:[11,1,1,""],CC:[11,1,1,""],ClassifyAndCount:[11,4,1,""],ELM:[11,1,1,""],EMQ:[11,1,1,""],ExpectationMaximizationQuantifier:[11,4,1,""],ExplicitLossMinimisation:[11,4,1,""],HDy:[11,1,1,""],HellingerDistanceY:[11,4,1,""],MAX:[11,1,1,""],MS2:[11,1,1,""],MS:[11,1,1,""],MedianSweep2:[11,4,1,""],MedianSweep:[11,4,1,""],OneVsAll:[11,1,1,""],PACC:[11,1,1,""],PCC:[11,1,1,""],ProbabilisticAdjustedClassifyAndCount:[11,4,1,""],ProbabilisticClassifyAndCount:[11,4,1,""],SLD:[11,4,1,""],SVMAE:[11,1,1,""],SVMKLD:[11,1,1,""],SVMNKLD:[11,1,1,""],SVMQ:[11,1,1,""],SVMRAE:[11,1,1,""],T50:[11,1,1,""],ThresholdOptimization:[11,1,1,""],X:[11,1,1,""]},"quapy.method.aggregative.ACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""],solve_adjustment:[11,2,1,""]},"quapy.method.aggregative.AggregativeProbabilisticQuantifier":{posterior_probabilities:[11,2,1,""],predict_proba:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.AggregativeQuantifier":{aggregate:[11,2,1,""],aggregative:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],learner:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.CC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ELM":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.EMQ":{EM:[11,2,1,""],EPSILON:[11,4,1,""],MAX_ITER:[11,4,1,""],aggregate:[11,2,1,""],fit:[11,2,1,""],predict_proba:[11,2,1,""]},"quapy.method.aggregative.HDy":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.OneVsAll":{aggregate:[11,2,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],posterior_probabilities:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.PACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.PCC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ThresholdOptimization":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.base":{BaseQuantifier:[11,1,1,""],BinaryQuantifier:[11,1,1,""],isaggregative:[11,5,1,""],isbinary:[11,5,1,""],isprobabilistic:[11,5,1,""]},"quapy.method.base.BaseQuantifier":{aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],n_classes:[11,3,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.base.BinaryQuantifier":{binary:[11,3,1,""]},"quapy.method.meta":{EACC:[11,5,1,""],ECC:[11,5,1,""],EEMQ:[11,5,1,""],EHDy:[11,5,1,""],EPACC:[11,5,1,""],Ensemble:[11,1,1,""],ensembleFactory:[11,5,1,""],get_probability_distribution:[11,5,1,""]},"quapy.method.meta.Ensemble":{VALID_POLICIES:[11,4,1,""],aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.neural":{QuaNetModule:[11,1,1,""],QuaNetTrainer:[11,1,1,""],mae_loss:[11,5,1,""]},"quapy.method.neural.QuaNetModule":{device:[11,3,1,""],forward:[11,2,1,""]},"quapy.method.neural.QuaNetTrainer":{classes_:[11,3,1,""],clean_checkpoint:[11,2,1,""],clean_checkpoint_dir:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.non_aggregative":{MaximumLikelihoodPrevalenceEstimation:[11,1,1,""]},"quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation":{classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.model_selection":{GridSearchQ:[8,1,1,""]},"quapy.model_selection.GridSearchQ":{best_model:[8,2,1,""],classes_:[8,3,1,""],fit:[8,2,1,""],get_params:[8,2,1,""],quantify:[8,2,1,""],set_params:[8,2,1,""]},"quapy.plot":{binary_bias_bins:[8,5,1,""],binary_bias_global:[8,5,1,""],binary_diagonal:[8,5,1,""],brokenbar_supremacy_by_drift:[8,5,1,""],error_by_drift:[8,5,1,""]},"quapy.util":{EarlyStop:[8,1,1,""],create_if_not_exist:[8,5,1,""],create_parent_dir:[8,5,1,""],download_file:[8,5,1,""],download_file_if_not_exists:[8,5,1,""],get_quapy_home:[8,5,1,""],map_parallel:[8,5,1,""],parallel:[8,5,1,""],pickled_resource:[8,5,1,""],save_text_file:[8,5,1,""],temp_seed:[8,5,1,""]},quapy:{classification:[9,0,0,"-"],data:[10,0,0,"-"],error:[8,0,0,"-"],evaluation:[8,0,0,"-"],functional:[8,0,0,"-"],isbinary:[8,5,1,""],method:[11,0,0,"-"],model_selection:[8,0,0,"-"],plot:[8,0,0,"-"],util:[8,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","property","Python property"],"4":["py","attribute","Python attribute"],"5":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:property","4":"py:attribute","5":"py:function"},terms:{"0":[0,1,3,4,5,8,9,10,11],"00":[0,1,4,8],"000":1,"0001":[4,11],"000e":1,"001":[4,9,11],"005":8,"008":[],"009":1,"0097":[],"01":[8,9,11],"017":1,"018":0,"02":1,"021":0,"02552":4,"03":1,"034":1,"035":1,"037":1,"04":1,"041":1,"042":1,"046":1,"048":1,"05":[5,8,10],"055":1,"063":[0,10],"065":0,"070":1,"073":1,"075":1,"078":0,"081":[0,10],"082":[0,1],"083":0,"086":0,"091":1,"099":0,"1":[0,1,3,4,5,8,9,10,11],"10":[0,1,4,5,8,9,11],"100":[0,1,3,4,5,9,10,11],"1000":[0,4,11],"10000":4,"100000":4,"1007":[],"101":[4,8,10],"1010":4,"1024":11,"104":0,"108":1,"109":0,"11":[0,1,6,8,10],"11338":0,"114":1,"1145":[],"12":9,"120":0,"1215742":0,"1271":0,"13":[0,9],"139":0,"14":3,"142":1,"146":3,"1473":0,"148":0,"1484":0,"15":[3,8,10],"150":0,"153":0,"157":0,"158":0,"159":0,"1593":0,"1594":0,"1599":0,"161":0,"163":[0,1],"164":[0,3],"167":0,"17":0,"1771":1,"1775":[0,3],"1778":[0,3],"178":0,"1823":0,"1839":0,"18399":0,"1853":0,"19":[3,10],"193":0,"199151":0,"19982":4,"1e":9,"1st":0,"2":[0,1,3,5,8,10,11],"20":[5,8,11],"200":[1,9],"2000":0,"2002":3,"2006":11,"2008":11,"2011":4,"2013":3,"2015":[0,2,3,9,11],"2016":[3,10,11],"2017":[0,3,10,11],"2018":[0,3,10],"2019":[3,10,11],"2020":4,"2021":11,"20342":4,"206":0,"207":0,"208":0,"21":[1,3,5,8,10],"210":[],"211":0,"2126":0,"2155":0,"21591":[0,10],"218":3,"2184":0,"219e":1,"22":[0,3,9,10],"222":0,"222046":0,"226":0,"229":1,"229399":0,"23":9,"235":1,"238":0,"2390":0,"24":[0,9],"243":0,"248563":0,"24866":4,"24987":4,"25":[0,5,8,9,11],"25000":0,"256":[0,9],"26":9,"261":0,"265":0,"266":0,"267":0,"27":[1,3,9],"270":0,"2700406":[],"271":0,"272":0,"274":0,"275":1,"27th":[0,3,10],"28":3,"280":0,"281":0,"282":0,"283":[0,1],"288":0,"289":0,"2971":0,"2nd":0,"2t":[1,8],"2tp":8,"2x5fcv":0,"3":[0,1,3,5,6,8,9,10,11],"30":[0,1,3,11],"300":[0,1,9],"305":0,"306":0,"312":0,"32":[0,6],"3227":8,"3269206":[],"3269287":[],"33":[0,5,8],"331":0,"333":0,"335":0,"337":0,"34":[0,3,10,11],"341":0,"346":1,"347":0,"350":0,"351":0,"357":1,"359":0,"361":0,"366":1,"372":0,"373":0,"376132":0,"3765":0,"3813":0,"3821":[0,10],"383e":1,"387e":1,"392":0,"394":0,"399":0,"3f":[1,6],"3rd":0,"4":[0,1,3,4,5,8,11],"40":[0,3,4,11],"404333":0,"407":0,"41":3,"412":0,"412e":1,"413":0,"414":0,"417":0,"41734":4,"42":[1,8],"421":0,"4259":0,"426e":1,"427":0,"430":0,"434":0,"435":1,"43676":4,"437":0,"44":0,"4403":10,"446":0,"45":[3,5,10],"452":0,"459":1,"4601":0,"461":0,"463":0,"465":0,"466":0,"470":0,"48":3,"481":0,"48135":4,"486":0,"4898":0,"492":0,"496":0,"4960":1,"497":0,"5":[0,1,3,4,5,8,9,10,11],"50":[0,5,8,11],"500":[0,1,4,5,11],"5000":[1,5],"5005":4,"507":0,"508":0,"512":[9,11],"514":0,"515e":1,"530":0,"534":0,"535":0,"535e":1,"5379":4,"539":0,"541":1,"546":0,"5473":0,"54it":4,"55":5,"55it":4,"565":1,"569":0,"57":0,"573":0,"578":1,"583":0,"591":3,"5f":4,"5fcv":[],"5fcvx2":10,"6":[0,1,3,5,8,10],"60":0,"600":1,"601":0,"604":3,"606":0,"625":0,"627":0,"633e":1,"634":1,"64":[9,11],"640":0,"641":0,"650":0,"653":0,"654":1,"66":[1,11],"665":0,"667":0,"669":0,"67":[5,8],"683":0,"688":0,"691":0,"694582":0,"7":[1,5,8,9,11],"70":0,"700":0,"701e":1,"711":0,"717":1,"725":1,"730":0,"735":0,"740e":1,"748":0,"75":[0,5,8],"762":0,"774":0,"778":0,"787":0,"794":0,"798":0,"8":[0,1,5,10,11],"8000":0,"830":0,"837":1,"858":1,"861":0,"87":[0,3,10],"8788":0,"889504":0,"8d2fhsgcvn0aaaaa":[],"9":[0,1,3,5,8],"90":[5,8],"901":0,"909":1,"914":1,"917":0,"919":[0,10],"922":0,"923":0,"935":0,"936":0,"937":[0,10],"945":1,"95":[8,10],"9533":0,"958":0,"97":0,"979":0,"982":0,"99":8,"abstract":[3,9,10,11],"boolean":[8,10,11],"case":[0,1,3,4,5,8,10,11],"class":[0,1,3,4,5,6,8,9,10,11],"d\u00edez":3,"default":[1,3,8,9,10,11],"do":[0,1,3,4,8,9,10,11],"final":[1,3,5,11],"float":[0,3,8,9,10,11],"function":[0,1,3,4,5,6,7,9,10,11],"g\u00e1llego":[0,3,10,11],"gonz\u00e1lez":3,"import":[0,1,3,4,5,6,10,11],"int":[0,5,8,10,11],"long":[4,9],"new":[0,3,10],"p\u00e9rez":[0,3,10,11],"return":[0,1,3,4,5,8,9,10,11],"rodr\u0131":3,"short":9,"static":[3,11],"true":[0,1,3,4,5,6,8,9,10,11],"try":4,"while":[3,5,8,9,10,11],A:[0,3,8,9,10,11],As:[3,4],By:[1,3,8],For:[0,1,5,6,8,10],If:[3,5,8,10,11],In:[0,1,2,3,4,5,6,9],It:[3,4,5,8],One:[0,1,3,11],That:[1,4],The:[0,1,2,4,5,6,8,9,10,11],Then:3,These:0,To:[5,10],_:[5,8,10],__:[],__class__:5,__name__:5,_adjust:[],_ae_:[],_classify_:[],_error_name_:[],_fit_learner_:[],_kld_:[],_labelledcollection_:[],_learner_:[],_mean:[],_min_df_:[],_my:[],_nkld_:[],_posterior_probabilities_:11,_q_:[],_rae_:[],_svmperf_:[],ab:[],aboud:3,about:[0,5,8,10],abov:[0,3,5,8],absolut:[1,3,5,6,8,11],absolute_error:8,abstractmethod:3,acc:[1,3,5,6,8,11],acc_error:8,accept:3,access:[0,3,10,11],accommod:0,accord:[1,3,4,8,9,10,11],accordingli:5,accuraci:[1,5,8,11],accuracy_polici:[],achiev:[1,3,4,5],acm:[0,3,10],across:[0,1,4,5,6,8],action:0,actual:[10,11],acut:0,ad:6,adapt:8,add:[3,4,8,10],add_word:10,addit:3,addition:0,adjust:[3,6,8,11],adjusted_quantif:8,adjustedclassifyandcount:11,adopt:[3,4,10],advanc:[0,6],advantag:[3,11],ae:[1,2,5,8,11],ae_:1,affect:8,after:[8,11],afterward:11,again:5,against:5,aggreg:[1,4,5,6,7,8],aggregativeprobabilisticquantifi:[3,11],aggregativequantifi:[3,11],aggregg:[],aim:[4,5],aka:[10,11],al:[0,2,9,10,11],alaiz:3,alegr:3,alejandro:4,algorithm:[8,11],alia:[3,8,11],all:[0,1,2,3,5,8,10,11],allia:3,alloc:[8,9],allow:[0,1,2,3,5,8,9,10,11],almost:3,along:[0,3,8,11],alreadi:[3,11],also:[0,1,2,3,5,6,8,9],altern:4,although:[3,4,5,11],alwai:[3,4,5,11],among:3,amount:8,an:[0,1,2,3,4,5,6,8,9,10,11],analys:[5,6],analysi:[0,3,6,10],analyz:5,ani:[0,1,3,4,5,6,8,9,10,11],anoth:[0,1,3,5],anotherdir:8,anyon:0,anyth:11,api:6,app:[8,10,11],appeal:1,appear:5,append:5,appli:[2,3,4,5,8,9,10,11],appropri:4,approxim:[1,5,8,9,10],ar:[0,1,3,4,5,8,9,10,11],archive_filenam:8,archive_path:[],arg:[8,10],argmax:8,args_i:8,argu:4,argument:[0,1,3,5,8,10,11],arifici:[],aris:1,around:[1,10],arrai:[1,3,5,8,9,10,11],articl:[3,4],artifici:[0,1,3,4,5,6,8,10],artificial_prevalence_predict:8,artificial_prevalence_protocol:8,artificial_prevalence_report:8,artificial_prevalence_sampl:8,artificial_sampling_ev:[1,4],artificial_sampling_gener:[0,10],artificial_sampling_index_gener:10,artificial_sampling_predict:[1,5],artificial_sampling_report:1,arxiv:4,asarrai:1,asdataload:9,asonam:0,assert:10,assess:4,assign:[3,8,10],associ:[8,10],assum:[1,6,11],assumpion:11,assumpt:[1,5,6],astyp:[],attempt:[3,11],attribut:11,august:0,autom:[0,3,6],automat:[0,1],av:[3,11],avail:[0,1,2,3,5,6,9,11],averag:[1,3,8,10,11],avoid:[1,8],ax:11,axi:[5,8],b:[0,10,11],balanc:[0,4,11],band:[5,8],bar:8,barranquero:[2,3,9,11],base:[0,3,6,7,8,9],base_classifi:5,base_estim:3,base_quantifier_class:11,baseestim:[9,11],baselin:6,basequantifi:[3,8,11],basic:[5,11],batch:9,batch_siz:9,batch_size_test:9,beat:11,been:[0,3,4,5,8,10,11],befor:[3,8,9,10,11],beforehand:8,behav:[3,5],being:[4,8,11],belief:1,belong:[3,11],below:[0,2,3,5,8,10],best:[4,8,9],best_epoch:8,best_model:8,best_model_:4,best_params_:4,best_scor:8,better:4,between:[4,5,6,8,9,11],beyond:5,bia:[6,8],bias:5,bidirect:11,bin:[5,8,11],bin_bia:5,bin_diag:5,binar:[8,10],binari:[3,5,6,8,9,10,11],binary_bias_bin:[5,8],binary_bias_glob:[5,8],binary_diagon:[5,8],binary_quantifi:11,binaryquantifi:11,binom:8,block:[0,8],bool:8,both:5,bound:[8,11],box:[5,8],breast:0,brief:1,bring:11,broken:[5,8],brokenbar_supremacy_by_drift:8,budg:1,budget:[1,4],build:[],bypass:11,c:[3,4,8,9,10,11],calcul:8,calibr:3,calibratedclassifi:3,calibratedclassifiercv:3,calibratedcv:[],call:[0,1,5,8,10,11],callabl:[0,8,10],can:[0,1,2,3,4,5,8,10,11],cancer:0,cannot:[],cardiotocographi:0,care:11,carri:[3,10,11],casa_token:[],castano:[3,10],castro:3,categor:[3,10],categori:[1,8],cc:[3,5,11],ceil:8,cell:11,center:5,chang:[0,1,3,10],character:[3,6],characteriz:[0,3,10],charg:[0,8,10],chart:8,check:[3,4],checkpoint:[9,11],checkpointdir:11,checkpointnam:11,checkpointpath:9,choic:4,choos:11,chosen:[4,8],cl:0,cla:[],class2int:10,class_weight:[4,11],classes_:[8,10,11],classif:[0,1,3,7,8,10,11],classif_posterior:[3,11],classif_predict:[3,11],classif_predictions_bin:11,classifi:[1,4,5,6,8,9,11],classifier_net:9,classifiermixin:9,classifyandcount:[3,11],classmethod:[0,10,11],classnam:10,classs:8,clean_checkpoint:11,clean_checkpoint_dir:11,clear:5,clearer:1,clearli:5,clip:8,close:[1,10],closer:1,closest:11,cm:8,cmc:0,cnn:[3,11],cnnnet:[3,9,11],code:[0,3,4,5,9],codifi:10,coincid:[0,6],col:[0,10],collect:[0,8,9,10],collet:10,color:[5,8],colormap:8,column:[0,8,10],com:8,combin:[0,1,4,8,10,11],combinatio:8,combinations_budget:8,come:[0,8,10,11],commandlin:[],common:11,commonli:6,compar:[5,8],comparison:5,compat:11,compil:[2,3],complement:11,complet:[3,5,11],compon:[8,9],compress:0,comput:[1,3,5,8,11],computation:4,compute_fpr:[],compute_t:[],compute_tpr:[],concept:6,concur:[],condit:[8,11],conduct:[0,8],confer:[0,3,10],confid:8,configur:[4,8],conform:10,connect:11,consecut:[8,9,11],consid:[3,5,8,9,10,11],consist:[0,4,5,8,9,10,11],constrain:[1,5,8,10],constructor:3,consult:[0,1],contain:[1,2,3,5,8,9,10,11],contanin:8,content:7,context:8,contrast:1,control:[1,4,10],conv_block:[],conv_lay:[],conveni:8,converg:11,convert:[1,3,8,9,10,11],convolut:9,copi:[8,10],cornel:[],correct:11,correctli:8,correspond:[5,8,10],cosest:11,cost:1,costli:4,could:[0,1,3,4,5,6],count:[4,5,6,8,10,11],count_:[],counter:10,countvector:10,covari:10,cover:[1,4,9],coz:[0,3,10],cpu:[1,9,11],creat:[0,6,8],create_if_not_exist:8,create_parent_dir:8,crisp:[3,8],criteria:4,cross:[3,10,11],cs:8,csr:10,csr_matrix:10,csv:10,ctg:0,cuda:[3,9,11],cumbersom:1,cumberson:8,cumul:11,curios:5,current:[3,8,9,10,11],custom:[3,6,8,10],customarili:[3,4],cv:[3,4],cyan:5,d:11,d_:8,dat:[0,9],data:[1,3,4,5,6,7,8,9,11],data_hom:10,datafram:[1,8],dataload:9,dataset:[1,3,4,5,6,7,8,9,11],dataset_nam:10,deal:0,decaesteck:[3,11],decai:9,decid:10,decim:1,decis:[3,8,9,11],decision_funct:9,decomposit:9,dedic:[1,10],deep:[3,8,11],def:[0,1,3,5,8],defin:[0,3,8,9,10,11],degre:4,del:[0,3,10],delai:8,deliv:[3,11],denomin:11,dens:[0,11],densiti:8,depend:[0,1,4,5,8,11],describ:[3,8,11],descript:0,design:4,desir:[0,1,10],despit:1,destin:8,detail:[0,1,3,6,9,10,11],determin:[1,4,5],detriment:5,devel:10,develop:[4,6],deviat:[0,1,5,8,10],devic:[0,3,5,9,11],df:1,df_replac:[],diabet:0,diagon:[6,8],dict:[8,10,11],dictionari:[8,9,10,11],differ:[0,1,3,4,5,6,8,10,11],difficult:5,digit:0,dimens:[8,9,10,11],dimension:[8,9,10,11],dir:8,directli:[0,1,3],directori:[2,8,9,10,11],discard:8,discoveri:3,discret:8,discuss:5,disjoint:9,disk:8,displai:[1,5,8],displaystyl:8,distanc:[8,11],distant:[1,8],distribut:[0,3,5,8,10,11],diverg:[1,3,8,11],divid:8,dl:[],doabl:0,doc_embed:11,doc_embedding_s:11,doc_posterior:11,document:[0,1,3,5,9,10,11],document_embed:9,doe:[0,2,3,8,11],doi:[],done:3,dot:[5,8],dowload:8,down:[5,8,10],download:[0,2,3,8],download_fil:8,download_file_if_not_exist:8,draw:[8,10],drawn:[0,1,4,8,10],drift:6,drop:9,drop_p:9,dropout:[9,11],ds:[3,11],ds_polici:[],ds_policy_get_posterior:[],dtype:[1,10],dump:10,dure:[1,5,11],dynam:[3,9,10,11],e:[0,1,3,4,5,6,8,9,10,11],eacc:11,each:[0,1,3,4,5,8,9,10,11],earli:[8,9,11],early_stop:[],earlystop:8,easili:[0,2,5,9],ecc:11,edu:[],eemq:11,effect:3,effici:3,ehdi:11,either:[1,3,8,10,11],element:[3,10,11],elm:[3,11],els:11,em:11,emb:9,embed:[3,9,11],embed_s:9,embedding_s:9,empti:10,emq:[5,11],enabl:9,encod:10,end:[4,8,11],endeavour:6,enough:5,ensembl:[0,6,10,11],ensemblefactori:11,ensure_probabilist:[],entir:[0,3,4,5,8],entri:11,environ:[1,3,4,5,8,11],ep:[1,8],epacc:11,epoch:[8,9,11],epsilon:[1,8,11],equal:[1,8],equidist:[0,8],equip:[3,5],equival:11,err:[],err_drift:5,err_nam:8,error:[3,4,6,7,9,11],error_:[],error_by_drift:[5,8],error_funct:1,error_metr:[1,4,8],error_nam:[5,8],especi:8,establish:8,estim:[1,3,5,6,8,9,10,11],estim_prev:[1,5,8],estim_preval:[3,6,11],estimant:11,esuli:[0,2,3,9,10,11],et:[0,2,9,10,11],etc:6,eval_budget:[4,8],evalu:[0,3,4,5,6,7,9,10,11],even:8,eventu:[9,10],everi:[3,11],everyth:3,evinc:5,ex:[],exact:[0,10],exactli:0,exampl:[0,1,3,4,5,8,9,10,11],exce:8,excel:0,except:[3,8,11],exemplifi:0,exhaust:8,exhibit:[4,5],exist:8,exist_ok:8,expand_frame_repr:1,expect:[6,11],expectationmaximizationquantifi:[3,11],experi:[1,2,3,4,5,8],explain:[1,5],explicit:11,explicitlossminim:[],explicitlossminimis:11,explor:[4,8,10],express:10,ext:2,extend:[2,3,11],extens:[0,2,5],extern:3,extract:[1,8,10],f1:[1,8,9],f1_error:8,f1e:[1,8],f:[0,1,3,4,5,6,10],f_1:8,fabrizio:4,facilit:6,fact:[3,5],factor:8,factori:11,fals:[1,3,5,8,9,10,11],famili:[3,11],familiar:3,far:[8,9,10],fare:8,fast:8,faster:[0,10],feat1:10,feat2:10,featn:10,featur:[0,10],feature_extract:10,fetch:[0,6],fetch_review:[0,1,3,4,5,10,11],fetch_twitt:[0,3,6,10],fetch_ucidataset:[0,3,10],fetch_ucilabelledcollect:[0,10],ff:11,ff_layer:11,fhe:0,file:[0,5,8,9,10,11],filenam:8,fin:0,find:[0,4],finish:4,first:[0,1,2,3,5,8,10,11],fit:[1,3,4,5,6,8,9,10,11],fit_learn:[3,11],fit_transform:10,fix:[1,4],flag:8,float64:1,fn:8,fold:[3,10,11],folder:[0,11],follow:[0,1,3,4,5,6,8,11],fomart:10,for_model_select:[0,10],form:[0,8,10],forman:11,format:[0,5,10],former:[2,11],forward:[9,11],found:[0,3,4,8,9,10],four:3,fp:8,fpr:[8,11],frac:8,framework:6,frequenc:[0,10,11],from:[0,1,3,4,5,6,8,10,11],from_csv:10,from_nam:[1,8],from_spars:10,from_text:10,full:[1,8],fulli:0,func:8,further:[0,1,3,9,10,11],fusion:[0,3,10],futur:3,g:[0,1,3,4,6,8,10,11],gain:8,gao:[0,3,10,11],gap:10,gasp:[0,10],gen:8,gen_data:5,gen_fn:8,gen_prevalence_predict:8,gen_prevalence_report:8,gener:[0,1,3,4,5,8,9,10,11],generation_func:8,german:0,get:[0,1,5,8,9,10,11],get_aggregative_estim:[],get_nprevpoints_approxim:[1,8],get_param:[3,8,9,11],get_probability_distribut:11,get_quapy_hom:8,ggener:8,github:[],give:11,given:[1,3,4,8,9,10,11],global:8,goal:11,goe:4,good:[4,5],got:4,govern:1,gpu:[9,11],grant:[],greater:10,grid:[4,8,10,11],gridsearchcv:[4,11],gridsearchq:[4,8,11],ground:11,group:3,guarante:10,guez:3,gzip:0,ha:[3,4,5,8,9,10,11],haberman:[0,3],had:10,handl:0,happen:[4,5],hard:3,harder:5,harmon:8,harri:0,hat:8,have:[0,1,2,3,4,5,8,10,11],hcr:[0,3,10],hd:8,hdy:[6,11],held:[3,4,8,9,11],helling:11,hellingerdist:8,hellingerdistancei:[3,11],hellingh:8,help:5,henc:[8,10],here:[1,11],heurist:11,hidden:[5,9,11],hidden_s:9,hide:5,high:[5,8],higher:[1,5],highlight:8,hightlight:8,histogram:11,hlt:[],hold:[6,8,11],home:[8,10],hook:11,how:[0,1,3,4,5,8,10,11],howev:[0,4,5],hp:[0,3,4,10],html:10,http:[8,10],hyper:[4,8,9],hyperparam:4,hyperparamet:[3,8],i:[0,1,3,4,5,8,9,10,11],id:[0,3,10],identifi:8,idf:0,ieee:0,ignor:[8,10,11],ii:8,iid:[1,5,6],illustr:[3,4,5],imdb:[0,5,10],implement:[0,1,3,4,5,6,8,9,10,11],implicit:8,impos:[4,8],improv:[3,8,9,11],includ:[0,1,3,5,6,10,11],inconveni:8,inde:[3,4],independ:[8,11],index:[0,3,6,8,9,10,11],indextransform:10,indic:[0,1,3,4,5,8,10,11],individu:[1,3],infer:[0,10],inform:[0,1,3,4,8,10,11],infrequ:10,inherit:3,init:3,init_hidden:[],initi:[0,9],inplac:[1,3,10,11],input:[3,5,8,9,11],insight:5,inspir:3,instal:[0,3,6,9,11],instanc:[0,3,4,5,6,8,9,10,11],instanti:[0,1,3,4,9,11],instead:[1,3,4,11],integ:[3,8,9,10,11],integr:6,interest:[1,5,6,8,10],interestingli:5,interfac:[0,1,11],intern:[0,3,10],interpret:[5,6,11],interv:[1,5,8,10],introduc:1,invok:[0,1,3,8,10],involv:[2,5,8],io:[],ionospher:0,iri:0,irrespect:[5,11],isaggreg:11,isbinari:[8,10,11],isomer:8,isometr:[5,8],isprobabilist:11,isti:[],item:8,iter:[0,8,11],its:[3,4,8,9,11],itself:[3,8,11],j:[0,3,10,11],joachim:[3,9,11],job:[2,8],joblib:2,join:8,just:[1,3],k:[3,6,8,10,11],keep:8,kei:[8,10],kept:10,kernel:9,kernel_height:9,keyword:[10,11],kfcv:[0,10,11],kindl:[0,1,3,5,10,11],kl:8,kld:[1,2,8,9,11],know:3,knowledg:[0,3,10],known:[0,3,4,11],kraemer:8,kullback:[1,3,8,11],kwarg:[9,10,11],l1:[8,11],l:11,label:[0,3,4,5,6,8,9,10,11],labelledcollect:[0,3,4,8,10,11],larg:4,larger:[10,11],largest:8,last:[1,3,5,8,9,10],lastli:3,latex:5,latinn:[3,11],latter:11,layer:[3,9,11],lazi:11,lead:[1,10],learn:[1,2,3,4,6,8,9,10,11],learner:[3,4,9,11],least:[0,10],leav:10,left:10,legend:8,leibler:[1,3,8,11],len:8,length:[9,10],less:[8,10],let:[1,3],level:[],leverag:3,leyend:8,like:[0,1,3,5,8,9,10,11],likelihood:11,limit:[5,8,10,11],line:[1,3,8],linear:[5,11],linear_model:[1,3,4,6,9],linearsvc:[3,5,10],link:[],linspac:5,list:[0,5,8,9,10,11],listedcolormap:8,literatur:[0,1,4,6],load:[0,3,8,10,11],loader:[0,10],loader_func:[0,10],loader_kwarg:10,local:8,log:[8,10],logist:[1,3,9,11],logisticregress:[1,3,4,6,9,11],logscal:8,logspac:[4,11],longer:8,longest:9,look:[0,1,3,5,11],loop:11,loss:[6,9,11],low:[5,8,9],lower:[5,8,11],lower_is_bett:8,lowest:5,lowranklogisticregress:9,lr:[1,3,9,11],lstm:[3,9,11],lstm_class_nlay:9,lstm_hidden_s:11,lstm_nlayer:11,lstmnet:9,m:[3,8,11],machin:[1,4,6],macro:8,made:[0,2,8,10,11],mae:[1,4,6,8,9,11],mae_loss:11,mai:8,main:5,maintain:[3,11],make:[0,1,3,11],makedir:8,mammograph:0,manag:[0,3,10],mani:[1,3,4,5,6,8,10,11],manner:0,manual:0,map:[1,9],map_parallel:8,margin:9,mass:8,math:[],mathcal:8,matplotlib:[2,8],matric:[0,5,10],matrix:[5,8,11],max:11,max_it:11,max_sample_s:11,maxim:[6,11],maximum:[1,8,9,11],maximumlikelihoodprevalenceestim:11,md:[],mean:[0,1,3,4,5,6,8,9,10,11],mean_absolute_error:8,mean_relative_absolute_error:8,measur:[2,3,4,5,6,8,11],median:11,mediansweep2:11,mediansweep:11,member:[3,11],memori:9,mention:3,merg:5,met:10,meta:[6,7,8],meth:[],method:[0,1,4,5,6,7,8],method_data:5,method_nam:[5,8],method_ord:8,metric:[1,3,4,6,8,11],might:[1,8,10],min_df:[1,3,4,5,10,11],min_po:11,mine:[0,3],minim:[8,11],minimum:[10,11],minimun:10,mining6:10,minu:8,misclassif:11,miss:8,mixtur:[3,11],mkld:[1,8,11],ml:10,mlpe:11,mnkld:[1,8,11],mock:[8,9],modal:4,model:[0,1,5,6,8,9,11],model_select:[4,7,11],modifi:[3,8],modul:[0,1,3,5,6,7],moment:[0,3],monitor:8,more:[3,5,8,11],moreo:[0,3,4,10,11],most:[0,3,5,6,8,10,11],movi:0,mrae:[1,6,8,9,11],ms2:11,ms:11,mse:[1,3,6,8,11],msg:[],multiclass:8,multipli:8,multiprocess:8,multivari:[3,9],must:[3,10,11],mutual:11,my:[],my_arrai:8,my_collect:10,my_custom_load:0,my_data:0,mycustomloss:3,n:[0,1,8,9,11],n_bin:[5,8],n_class:[1,3,8,9,10,11],n_classes_:11,n_compon:9,n_dimens:9,n_epoch:11,n_featur:9,n_instanc:[8,9,11],n_job:[1,3,4,8,10,11],n_preval:[0,8,10],n_prevpoint:[1,4,5,8],n_repeat:[1,8],n_repetit:[1,4,5,8],n_sampl:[8,9],name:[5,8,9,10,11],nativ:6,natur:[1,8,10,11],natural_prevalence_predict:8,natural_prevalence_protocol:8,natural_prevalence_report:8,natural_sampling_gener:10,natural_sampling_index_gener:10,nbin:[5,8],ndarrai:[1,3,8,10,11],necessarili:[],need:[0,3,8,10,11],neg:[0,5,8,11],nest:[],net:9,network:[0,8,9,10,11],neural:[0,7,8,10],neuralclassifiertrain:[3,9,11],neutral:0,next:[4,8,9,10],nfold:[0,10],nkld:[1,2,6,8,9,11],nn:[9,11],nogap:10,non:3,non_aggreg:[7,8],none:[1,4,8,9,10,11],nonetheless:4,nor:3,normal:[0,1,3,8,10,11],normalize_preval:8,note:[1,3,4,5,8,10],noth:11,now:5,nowadai:3,np:[1,3,4,5,8,10,11],npp:[8,10],nprevpoint:[],nrepeat:[0,10],num_prevalence_combin:[1,8],number:[0,1,3,5,8,9,10,11],numer:[0,1,3,6,10,11],numpi:[2,4,8,9,11],o_l6x_pcf09mdetq4tu7jk98mxfbgsxp9zso14jkuiyudgfg0:[],object:[0,8,9,10,11],observ:1,obtain:[1,4,8,11],obtaind:8,obvious:8,occur:[5,10],occurr:10,octob:[0,3],off:9,offer:[3,6],older:2,omd:[0,10],ommit:[1,8],onc:[1,3,5,8],one:[0,1,3,4,5,8,10,11],ones:[1,3,5,8,10],onevsal:[3,11],onli:[0,3,5,8,9,10,11],open:[0,6,10],oper:3,opt:4,optim:[2,3,4,8,9,11],optimize_threshold:[],option:[0,1,3,5,8,10,11],order:[0,2,3,5,8,10,11],order_bi:11,org:10,orient:[3,6,8,11],origin:[0,3,10],os:[0,8],other:[1,3,5,6,8,10,11],otherwis:[0,3,8,10,11],our:[],out:[3,4,5,8,9,10,11],outcom:5,outer:8,outlier:8,output:[0,1,3,4,8,9,10,11],outsid:11,over:[3,4,8],overal:1,overestim:5,overrid:3,overridden:[3,11],own:4,p:[0,3,8,10,11],p_hat:8,p_i:8,pacc:[1,3,5,8,11],packag:[0,2,3,6,7],pad:[9,10],pad_length:9,padding_length:9,page:[0,2,6],pageblock:0,pair:[0,8,11],panda:[1,2,8],paper:[0,3],parallel:[1,3,8,10,11],param:[4,9,11],param_grid:[4,8,11],param_mod_sel:11,param_model_sel:11,paramet:[1,3,4,8,9,10,11],parent:8,part:[3,10],particular:[0,1,3],particularli:1,pass:[0,1,5,8,9,11],past:1,patch:[2,3,9,11],path:[0,3,5,8,9,10,11],patienc:[8,9,11],pattern:3,pca:[],pcalr:[],pcc:[3,4,5,11],pd:1,pdf:5,peopl:[],percentil:8,perf:[6,9,11],perform:[1,3,4,5,6,8,9,11],perman:8,phase:11,phonem:0,pick:4,pickl:[3,8,10,11],pickle_path:8,pickled_resourc:8,pii:[],pip:2,pipelin:[],pkl:8,plai:0,plan:3,pleas:3,plot:[6,7],png:5,point:[0,1,3,8,10],polici:[3,11],popular:6,portion:4,pos_class:[8,10],posit:[0,3,5,8,10,11],possibl:[1,3,8],post:8,posterior:[3,8,9,11],posterior_prob:[3,11],postpon:3,potter:0,pp:[0,3],pprox:[],practic:[0,4],pre:[0,3],prec:[0,8],preced:10,precis:[0,1,8],preclassifi:3,predefin:10,predict:[3,4,5,8,9,11],predict_proba:[3,9,11],predictor:1,prefer:8,preliminari:11,prepare_svmperf:[2,3],preprint:4,preprocess:[0,1,3,7,8,11],present:[0,3,10],preserv:[1,5,8,10],pretti:5,prev:[0,1,8,10],prevail:3,preval:[0,1,3,4,5,6,8,10,11],prevalence_estim:8,prevalence_from_label:8,prevalence_from_prob:8,prevalence_linspac:8,prevel:11,previou:3,previous:[],prevs_estim:11,prevs_hat:[1,8],princip:9,print:[0,1,3,4,6,9,10],prior:[1,3,4,5,6,8,11],priori:3,probabilist:[3,11],probabilisticadjustedclassifyandcount:11,probabilisticclassifyandcount:11,probabl:[1,3,4,5,6,8,9,11],problem:[0,3,5,8,10,11],procedur:[3,6],proceed:[0,3,10],process:[3,4,8],processor:3,procol:1,produc:[0,1,5,8],product:3,progress:[8,10],properli:0,properti:[3,8,9,10,11],proport:[3,4,8,9,10,11],propos:[2,3,11],protocl:8,protocol:[0,3,4,5,6,8,10,11],provid:[0,3,5,6,11],ptecondestim:11,ptr:[3,11],ptr_polici:[],purpos:[0,11],put:11,python:[0,6],pytorch:[2,11],q:[0,2,3,8,9,11],q_i:8,qacc:9,qdrop_p:11,qf1:9,qgm:9,qp:[0,1,3,4,5,6,8,10,11],quanet:[2,6,9,11],quanetmodul:11,quanettrain:11,quantif:[0,1,6,8,9,10,11],quantifi:[3,4,5,6,8,11],quantification_error:8,quantiti:8,quapi:[0,1,2,3,4,5],quapy_data:0,quay_data:10,question:8,quevedo:[0,3,10],quick:[],quit:8,r:[0,3,8,10],rac:[],rae:[1,2,8,11],rais:[3,8,11],rand:8,random:[1,3,4,5,8,10],random_se:[1,8],random_st:10,randomli:0,rang:[0,5,8,11],rank:[3,9],rare:10,rate:[3,8,9,11],rather:[1,4],raw:10,rb:0,re:[3,4,10],reach:11,read:10,reader:[7,8],readm:[],real:[8,9,10,11],reason:[3,5,6],recal:8,receiv:[0,3,5],recip:11,recognit:3,recommend:[1,5,11],recomput:11,recurr:[0,3,10],recurs:11,red:0,red_siz:[3,11],reduc:[0,10],reduce_column:[0,10],refer:[9,10],refit:[4,8],regard:4,regardless:10,regim:8,region:8,regist:11,regress:9,regressor:[1,3],reindex_label:10,reiniti:9,rel:[1,3,8,10,11],relative_absolute_error:8,reli:[1,3,11],reliabl:3,rememb:5,remov:[10,11],repeat:[8,10],repetit:8,repl:[],replac:[0,3,10],replic:[1,4,8],report:[1,8],repositori:[0,10],repr_siz:9,repres:[1,3,5,8,10,11],represent:[0,3,8,9,11],reproduc:10,request:[0,8,10],requir:[0,1,3,6,9],reset_net_param:9,resourc:8,resp:11,respect:[0,1,5,8,11],respond:3,rest:[8,10,11],result:[1,2,3,4,5,6,8,11],retain:[0,3,9,11],retrain:4,return_constrained_dim:8,reus:[0,3,8],review:[5,6,10],reviews_sentiment_dataset:[0,10],rewrit:5,right:[4,8,10],role:0,root:6,roughli:0,round:10,routin:[8,10,11],row:[8,10],run:[0,1,2,3,4,5,8,10,11],s003132031400291x:[],s10618:[],s:[0,1,3,4,5,8,9,10,11],saeren:[3,11],sai:[],said:3,same:[0,3,5,8,10,11],sampl:[0,1,3,4,5,6,8,9,10,11],sample_s:[0,1,3,4,5,8,10,11],sampling_from_index:[0,10],sampling_index:[0,10],sander:[0,10],save:[5,8],save_or_show:[],save_text_fil:8,savepath:[5,8],scale:8,scall:10,scenario:[1,3,4,5,6],scienc:3,sciencedirect:[],scikit:[2,3,4,10],scipi:[2,10],score:[0,1,4,8,9,10],script:[1,2,3,6,11],se:[1,8],search:[3,4,6,8],sebastiani:[0,3,4,10,11],second:[0,1,3,5,8,10],secondari:8,section:4,see:[0,1,2,3,4,5,6,8,9,10,11],seed:[1,4,8],seem:3,seemingli:5,seen:[5,8,11],select:[0,3,6,8,10,11],selector:3,self:[3,8,9,10,11],semeion:0,semev:0,semeval13:[0,10],semeval14:[0,10],semeval15:[0,10],semeval16:[0,6,10],sentenc:10,sentiment:[3,6,10],separ:[8,10],sequenc:8,seri:0,serv:3,set:[0,1,3,4,5,6,8,9,10,11],set_opt:1,set_param:[3,8,9,11],set_siz:[],sever:0,sh:[2,3],shape:[5,8,9,10,11],share:[0,10],shift:[1,4,6,8,11],shorter:9,shoud:3,should:[0,1,3,4,5,6,9,10,11],show:[0,1,3,4,5,8,9,10,11],show_dens:8,show_std:[5,8],showcas:5,shown:[1,5,8],shuffl:[9,10],side:8,sign:8,signific:1,significantli:8,silent:[8,11],simeq:[],similar:[8,11],simpl:[0,3,5,11],simplest:3,simplex:[0,8],simpli:[1,2,3,4,5,6,8,11],sinc:[0,1,3,5,8,10,11],singl:[1,3,6,11],size:[0,1,3,8,9,10,11],sklearn:[1,3,4,5,6,9,10,11],sld:[3,11],slice:8,smooth:[1,8],smooth_limits_epsilon:8,so:[0,1,3,5,8,9,10,11],social:[0,3,10],soft:3,softwar:0,solid:5,solut:8,solv:[4,11],solve_adjust:11,some:[0,1,3,5,8,10,11],some_arrai:8,sometim:1,sonar:0,sort:11,sourc:[2,3,6,9],sout:[],space:[0,4,8,9],spambas:0,spars:[0,10],special:[0,5,10],specif:[3,4],specifi:[0,1,3,5,8,9,10],spectf:0,spectrum:[0,1,4,5,8],speed:[3,11],split:[0,3,4,5,8,9,10,11],split_stratifi:10,splitstratifi:10,spmatrix:10,springer:[],sqrt:8,squar:[1,3,8],sst:[0,10],stabil:[1,11],stabl:10,stackexchang:8,stand:[8,11],standard:[0,1,5,8,10,11],star:8,start:4,stat:10,state:8,statist:[0,1,8,11],stats_siz:11,std:9,stdout:8,step:[5,8],stop:[8,9,11],store:[0,9,10,11],str:[0,8,10],strategi:[3,4],stratif:10,stratifi:[0,3,10,11],stride:9,string:[1,8,10,11],strongli:[4,5],strprev:[0,1,8],structur:[3,11],studi:[0,3,10],style:10,subclass:11,subdir:8,subinterv:5,sublinear_tf:10,submit:0,submodul:7,subobject:[],suboptim:4,subpackag:7,subsequ:10,subtract:[0,8,10],subtyp:10,suffic:5,suffici:[],sum:[8,11],sum_:8,summar:0,supervis:[4,6],support:[3,6,9,10],surfac:10,surpass:1,svm:[3,5,6,9,10,11],svm_light:[],svm_perf:[],svm_perf_classifi:9,svm_perf_learn:9,svm_perf_quantif:[2,3],svmae:[3,11],svmkld:[3,11],svmnkld:[3,11],svmperf:[2,3,7,8,11],svmperf_bas:[9,11],svmperf_hom:3,svmq:[3,11],svmrae:[3,11],sweep:11,syntax:5,system:[4,11],t50:11,t:[0,1,3,8],tab10:8,tail:8,tail_density_threshold:8,take:[0,3,5,8,10,11],taken:[3,8,9,10],target:[3,5,6,8,9,11],task:[3,4,10],te:[8,10],temp_se:8,tempor:8,tend:5,tendenc:5,tensor:9,term:[0,1,3,4,5,6,8,9,10,11],test:[0,1,3,4,5,6,8,9,10,11],test_bas:[],test_dataset:[],test_method:[],test_path:[0,10],test_sampl:8,test_split:10,text2tfidf:[0,1,3,10],text:[0,3,8,9,10,11],textclassifiernet:9,textual:[0,6,10],tf:[0,10],tfidf:[0,4,5,10],tfidfvector:10,than:[1,4,5,8,9,10],thei:[0,3,11],them:[0,3,11],theoret:4,thereaft:1,therefor:[8,10],thi:[0,1,2,3,4,5,6,8,9,10,11],thing:3,third:[1,5],thorsten:9,those:[1,3,4,5,8,9,11],though:[3,8],three:[0,5],threshold:[8,11],thresholdoptim:11,through:[3,8],thu:[3,4,5,8,11],tictacto:0,time:[0,1,3,8,10],timeout:8,timeouterror:8,timer:8,titl:8,tj:[],tn:8,token:[0,9,10],tool:[1,6],top:[3,8,11],torch:[3,9,11],torchdataset:9,total:8,toward:[5,10],tp:8,tpr:[8,11],tqdm:2,tr:10,tr_iter_per_poch:11,tr_prev:[5,8,11],track:8,trade:9,tradition:1,train:[0,1,3,4,5,6,8,9,10,11],train_path:[0,10],train_prev:[5,8],train_prop:10,train_siz:10,train_val_split:[],trainer:9,training_help:[],training_preval:5,training_s:5,transact:3,transform:[0,9,10,11],transfus:0,trivial:3,true_prev:[1,5,8],true_preval:6,truncatedsvd:9,truth:11,ttest_alpha:8,tupl:[8,10,11],turn:4,tweet:[0,3,10],twitter:[6,10],twitter_sentiment_datasets_test:[0,10],twitter_sentiment_datasets_train:[0,10],two:[0,1,3,4,5,8,10,11],txt:8,type:[0,3,8,10,11],typic:[1,4,5,8,9,10,11],u1:10,uci:[6,10],uci_dataset:10,unabl:0,unadjust:5,unalt:9,unbias:5,uncompress:0,under:1,underestim:5,underlin:8,understand:8,unfortun:5,unifi:[0,11],uniform:[8,10],uniform_prevalence_sampl:8,uniform_sampl:10,uniform_sampling_index:10,uniform_simplex_sampl:8,uniformli:[8,10],union:[8,11],uniqu:10,unit:[0,8],unix:0,unk:10,unknown:10,unlabel:11,unless:11,unlik:[1,4],until:11,unus:[8,9],up:[3,4,8,9,11],updat:11,url:8,us:[0,1,3,4,5,6,8,9,10,11],user:[0,1,5],utf:10,util:[7,9],v:3,va_iter_per_poch:11,val:[0,10],val_split:[3,4,8,9,11],valid:[0,1,3,4,5,8,9,10,11],valid_loss:[3,9,11],valid_polici:11,valu:[0,1,3,8,9,10,11],variabl:[1,3,5,8,10],varianc:[0,5],variant:[5,6,11],varieti:4,variou:[1,5],vector:[0,8,9,10],verbos:[0,1,4,8,9,10,11],veri:[3,5],versatil:6,version:[2,9,11],vertic:8,vertical_xtick:8,via:[0,2,3,11],view:5,visual:[5,6],vline:8,vocab_s:9,vocabulari:[9,10],vocabulary_s:[3,9,10,11],vs:[3,8],w:[0,3,10],wa:[0,3,5,8,10,11],wai:[1,11],wait:9,want:[3,4],warn:10,wb:[0,10],wdbc:0,we:[0,1,3,4,5,6],weight:[9,10],weight_decai:9,well:[0,3,4,5,11],were:0,what:3,whcih:10,when:[0,1,3,4,5,8,9,10],whenev:[5,8],where:[3,5,8,9,10,11],wherebi:4,whether:[8,9,10,11],which:[0,1,3,4,5,8,9,10,11],white:0,whole:[0,1,3,4,8],whose:[10,11],why:3,wide:5,wiki:[0,3],wine:0,within:[8,11],without:[1,3,8,10],word:[1,3,6,9,10,11],work:[1,3,4,5,10],worker:[1,8,10,11],wors:[4,5,8],would:[0,1,3,5,6,8,10,11],wrapper:[8,9,10,11],written:6,www:[],x2:10,x:[5,8,9,10,11],x_error:8,xavier:9,xavier_uniform:9,xlrd:[0,2],xy:10,y:[5,8,9,10,11],y_:[],y_error:8,y_i:11,y_j:11,y_pred:8,y_true:8,ye:[],yeast:[0,10],yield:[5,8,10,11],yin:[],you:[2,3],your:3,z:[0,10],zero:[0,8],zfthyovrzwxmgfzylqw_y8cagg:[],zip:[0,5]},titles:["Datasets","Evaluation","Installation","Quantification Methods","Model Selection","Plotting","Welcome to QuaPy\u2019s documentation!","quapy","quapy package","quapy.classification package","quapy.data package","quapy.method package"],titleterms:{"function":8,A:6,The:3,ad:0,aggreg:[3,11],base:[10,11],bia:5,classif:[4,9],classifi:3,content:[6,8,9,10,11],count:3,custom:0,data:[0,10],dataset:[0,10],diagon:5,distanc:3,document:6,drift:5,emq:3,ensembl:3,error:[1,5,8],evalu:[1,8],ex:[],exampl:6,expect:3,explicit:3,featur:6,get:[],hdy:3,helling:3,indic:6,instal:2,introduct:6,issu:0,learn:0,loss:[2,3,4],machin:0,maxim:3,measur:1,meta:[3,11],method:[3,9,11],minim:3,model:[3,4],model_select:8,modul:[8,9,10,11],network:3,neural:[3,9,11],non_aggreg:11,orient:[2,4],packag:[8,9,10,11],perf:2,plot:[5,8],preprocess:10,process:0,protocol:1,quanet:3,quantif:[2,3,4,5],quapi:[6,7,8,9,10,11],quick:6,reader:10,readm:[],requir:2,review:0,s:6,select:4,sentiment:0,start:[],submodul:[8,9,10,11],subpackag:8,svm:2,svmperf:9,tabl:6,target:4,test:[],test_bas:[],test_dataset:[],test_method:[],titl:[],twitter:0,uci:0,util:8,variant:3,welcom:6,y:3}}) \ No newline at end of file +Search.setIndex({"docnames": ["Datasets", "Evaluation", "Installation", "Methods", "Model-Selection", "Plotting", "index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["Datasets.md", "Evaluation.md", "Installation.rst", "Methods.md", "Model-Selection.md", "Plotting.md", "index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Datasets", "Evaluation", "Installation", "Quantification Methods", "Model Selection", "Plotting", "Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"quapi": [0, 1, 2, 3, 4, 5], "make": [0, 1, 3, 8, 11], "avail": [0, 1, 2, 3, 5, 6, 9, 11], "sever": 0, "have": [0, 1, 2, 3, 4, 5, 8, 10, 11], "been": [0, 3, 4, 5, 8, 9, 10, 11], "us": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "quantif": [0, 1, 6, 8, 9, 10, 11], "literatur": [0, 1, 4, 6], "well": [0, 3, 4, 5, 11], "an": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "interfac": [0, 1, 11], "allow": [0, 1, 2, 3, 5, 8, 9, 10, 11], "anyon": 0, "import": [0, 1, 3, 4, 5, 6, 10, 11], "A": [0, 3, 8, 9, 10, 11], "object": [0, 8, 9, 10, 11], "i": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "roughli": 0, "pair": [0, 8], "labelledcollect": [0, 3, 4, 8, 10, 11], "one": [0, 1, 3, 4, 5, 8, 10, 11], "plai": 0, "role": 0, "train": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "set": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "anoth": [0, 1, 3, 5], "test": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "class": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "consist": [0, 4, 5, 8, 9, 10, 11], "iter": [0, 8, 11], "instanc": [0, 3, 4, 5, 6, 8, 9, 10, 11], "label": [0, 3, 4, 5, 6, 8, 9, 10, 11], "thi": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "handl": 0, "most": [0, 3, 5, 6, 8, 10, 11], "sampl": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "function": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11], "take": [0, 3, 5, 8, 10, 11], "look": [0, 1, 3, 5, 11], "follow": [0, 1, 3, 4, 5, 6, 8, 11], "code": [0, 3, 4, 5, 9], "qp": [0, 1, 3, 4, 5, 6, 8, 10, 11], "f": [0, 1, 3, 4, 5, 6, 10], "1st": 0, "posit": [0, 3, 5, 8, 10, 11], "document": [0, 1, 3, 5, 9, 10, 11], "2nd": 0, "onli": [0, 3, 5, 8, 9, 10, 11], "neg": [0, 5, 8, 11], "neutral": 0, "3rd": 0, "2": [0, 1, 3, 5, 8, 10, 11], "0": [0, 1, 3, 4, 5, 8, 9, 10, 11], "1": [0, 1, 3, 4, 5, 8, 9, 10, 11], "print": [0, 1, 3, 4, 6, 9, 10], "strprev": [0, 1, 8], "preval": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "prec": [0, 8], "output": [0, 1, 3, 4, 9, 10, 11], "show": [0, 1, 3, 4, 5, 8, 9, 10, 11], "digit": 0, "precis": [0, 1, 8], "17": 0, "50": [0, 5, 8, 11], "33": [0, 5, 8], "One": [0, 1, 3, 11], "can": [0, 1, 2, 3, 4, 5, 8, 10, 11], "easili": [0, 2, 5, 9], "produc": [0, 1, 5, 8], "new": [0, 3, 8, 9, 10], "desir": [0, 1, 10], "sample_s": [0, 1, 3, 4, 5, 8, 11], "10": [0, 1, 4, 5, 8, 9, 11], "prev": [0, 1, 8, 10], "4": [0, 1, 3, 4, 5, 10, 11], "5": [0, 1, 3, 4, 5, 8, 9, 10, 11], "which": [0, 1, 3, 4, 5, 8, 9, 10, 11], "40": [0, 3, 4, 11], "made": [0, 2, 8, 10, 11], "across": [0, 1, 4, 5, 6, 8, 11], "differ": [0, 1, 3, 4, 5, 6, 8, 10, 11], "run": [0, 1, 2, 3, 4, 5, 8, 10, 11], "e": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "g": [0, 1, 3, 4, 6, 8, 10, 11], "method": [0, 1, 4, 5, 6, 8], "same": [0, 3, 5, 8, 10, 11], "exact": [0, 10], "retain": [0, 3, 9, 11], "index": [0, 3, 6, 8, 9, 10, 11], "gener": [0, 1, 3, 4, 5, 8, 9, 10, 11], "sampling_index": [0, 10], "sampling_from_index": [0, 10], "also": [0, 1, 2, 3, 5, 6, 8, 9], "implement": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "artifici": [0, 1, 3, 4, 5, 6, 8], "protocol": [0, 3, 4, 5, 6, 7, 10, 11], "via": [0, 2, 3, 8, 9, 11], "python": [0, 6], "": [0, 1, 3, 4, 5, 8, 9, 10, 11], "seri": [0, 10], "equidist": [0, 8], "rang": [0, 5, 8, 11], "entir": [0, 3, 4, 5, 8], "spectrum": [0, 1, 4, 5, 8], "simplex": [0, 8], "space": [0, 4, 8, 9], "artificial_sampling_gener": 0, "100": [0, 1, 3, 4, 5, 8, 9, 10, 11], "n_preval": [0, 8], "each": [0, 1, 3, 4, 5, 8, 9, 10, 11], "valid": [0, 1, 3, 4, 5, 8, 9, 10, 11], "combin": [0, 1, 4, 8, 11], "origin": [0, 3, 8, 10], "from": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "split": [0, 3, 4, 5, 8, 9, 10, 11], "point": [0, 1, 3, 8, 10], "25": [0, 5, 8, 9, 11], "75": [0, 5, 8], "00": [0, 1, 4], "see": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "evalu": [0, 3, 4, 5, 6, 7, 9, 10, 11], "wiki": [0, 3], "further": [0, 1, 3, 9, 10, 11], "detail": [0, 1, 3, 6, 9, 10, 11], "how": [0, 1, 3, 4, 5, 8, 10, 11], "properli": 0, "three": [0, 5], "about": [0, 5, 8, 10], "kindl": [0, 1, 3, 5, 10, 11], "devic": [0, 3, 5, 9, 11], "harri": 0, "potter": 0, "known": [0, 3, 4, 8, 11], "imdb": [0, 5, 10], "movi": 0, "fetch": [0, 6], "unifi": [0, 11], "For": [0, 1, 5, 6, 8, 10], "exampl": [0, 1, 3, 4, 5, 8, 9, 10, 11], "fetch_review": [0, 1, 3, 4, 5, 10, 11], "These": [0, 9], "esuli": [0, 2, 3, 9, 10, 11], "moreo": [0, 3, 4, 10, 11], "sebastiani": [0, 3, 4, 10, 11], "2018": [0, 3, 10], "octob": [0, 3], "recurr": [0, 3, 10], "neural": [0, 8, 10], "network": [0, 8, 9, 10, 11], "In": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "proceed": [0, 3, 10], "27th": [0, 3, 10], "acm": [0, 3, 10, 11], "intern": [0, 3, 9, 10], "confer": [0, 3, 9, 10], "inform": [0, 1, 3, 4, 8, 9, 10, 11], "knowledg": [0, 3, 10], "manag": [0, 3, 10], "pp": [0, 3, 9], "1775": [0, 3], "1778": [0, 3], "The": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11], "list": [0, 5, 8, 9, 10, 11], "id": [0, 3, 10], "reviews_sentiment_dataset": [0, 10], "some": [0, 1, 3, 5, 8, 10, 11], "statist": [0, 1, 8, 11], "fhe": 0, "ar": [0, 1, 3, 4, 5, 8, 9, 10, 11], "summar": 0, "below": [0, 2, 3, 5, 8, 10], "size": [0, 1, 3, 8, 9, 10, 11], "type": [0, 3, 8, 10, 11], "hp": [0, 3, 4, 10], "9533": 0, "18399": 0, "018": 0, "982": 0, "065": 0, "935": 0, "text": [0, 3, 8, 9, 10, 11], "3821": [0, 10], "21591": [0, 10], "081": [0, 10], "919": [0, 10], "063": [0, 10], "937": [0, 10], "25000": 0, "500": [0, 1, 4, 5, 11], "11": [0, 1, 6, 8], "analysi": [0, 3, 6, 10], "access": [0, 3, 10, 11], "were": 0, "tf": [0, 10], "idf": 0, "format": [0, 5, 10, 11], "present": [0, 3, 10], "two": [0, 1, 3, 4, 5, 8, 10, 11], "val": [0, 9, 10], "model": [0, 1, 5, 6, 8, 9, 11], "select": [0, 3, 6, 8, 10, 11], "purpos": [0, 11], "exemplifi": 0, "load": [0, 3, 8, 10, 11], "fetch_twitt": [0, 3, 6, 10], "gasp": [0, 10], "for_model_select": [0, 10], "true": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "gao": [0, 3, 10, 11], "w": [0, 3, 10], "2015": [0, 2, 3, 9, 11], "august": 0, "tweet": [0, 3, 10], "classif": [0, 1, 3, 6, 8, 10, 11], "ieee": 0, "advanc": [0, 6], "social": [0, 3, 10], "mine": [0, 3], "asonam": 0, "97": 0, "104": 0, "semeval13": [0, 10], "semeval14": [0, 10], "semeval15": [0, 10], "share": [0, 10], "semev": 0, "mean": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "would": [0, 1, 3, 5, 6, 10, 11], "get": [0, 1, 5, 8, 9, 10, 11], "when": [0, 1, 3, 4, 5, 8, 9, 10], "request": [0, 8, 10, 11], "ani": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "them": [0, 3, 11], "consult": [0, 1], "twitter_sentiment_datasets_test": [0, 10], "9": [0, 1, 3, 5, 8], "replac": [0, 3, 10], "twitter_sentiment_datasets_train": [0, 10], "found": [0, 3, 4, 8, 9, 10], "featur": [0, 10], "3": [0, 1, 3, 5, 6, 8, 9, 10, 11], "8788": 0, "3765": 0, "694582": 0, "421": 0, "496": 0, "082": [0, 1], "407": 0, "507": 0, "086": 0, "spars": [0, 10], "hcr": [0, 3, 10], "1594": 0, "798": 0, "222046": 0, "546": 0, "211": 0, "243": 0, "640": 0, "167": 0, "193": 0, "omd": [0, 10], "1839": 0, "787": 0, "199151": 0, "463": 0, "271": 0, "266": 0, "437": 0, "283": [0, 1], "280": 0, "sander": [0, 10], "2155": 0, "923": 0, "229399": 0, "161": 0, "691": 0, "148": 0, "164": [0, 3], "688": 0, "11338": 0, "3813": 0, "1215742": 0, "159": 0, "470": 0, "372": 0, "158": 0, "430": 0, "412": 0, "1853": 0, "109": 0, "361": 0, "530": 0, "2390": 0, "153": 0, "413": 0, "434": 0, "semeval16": [0, 6, 10], "8000": 0, "2000": 0, "889504": 0, "157": 0, "351": 0, "492": 0, "163": [0, 1], "341": 0, "497": 0, "sst": [0, 10], "2971": 0, "1271": 0, "376132": 0, "261": 0, "452": 0, "288": 0, "207": 0, "481": 0, "312": 0, "wa": [0, 3, 5, 8, 10, 11], "2184": 0, "936": 0, "248563": 0, "305": 0, "414": 0, "281": 0, "282": 0, "446": 0, "272": 0, "wb": [0, 10], "4259": 0, "1823": 0, "404333": 0, "270": 0, "392": 0, "337": 0, "274": 0, "335": 0, "32": [0, 6], "repositori": [0, 10], "p\u00e9rez": [0, 3, 10, 11], "g\u00e1llego": [0, 3, 10, 11], "p": [0, 3, 8, 9, 10, 11], "quevedo": [0, 3, 10], "j": [0, 3, 10, 11], "r": [0, 3, 8, 10], "del": [0, 3, 10], "coz": [0, 3, 10], "2017": [0, 3, 10, 11], "ensembl": [0, 6, 10, 11], "problem": [0, 3, 5, 8, 10, 11], "characteriz": [0, 3, 10], "chang": [0, 1, 3, 10], "distribut": [0, 3, 5, 8, 10, 11], "case": [0, 1, 3, 4, 5, 8, 9, 10, 11], "studi": [0, 3, 10], "fusion": [0, 3, 10], "34": [0, 3, 10, 11], "87": [0, 3, 10], "doe": [0, 2, 3, 8, 11], "exactli": 0, "coincid": [0, 6], "et": [0, 2, 9, 10, 11], "al": [0, 2, 9, 10, 11], "sinc": [0, 1, 3, 5, 10, 11], "we": [0, 1, 3, 4, 5, 6, 10], "unabl": 0, "find": [0, 4, 11], "diabet": 0, "phonem": 0, "call": [0, 1, 5, 8, 10, 11], "fetch_ucidataset": [0, 3, 10], "yeast": [0, 10], "verbos": [0, 1, 4, 8, 9, 10, 11], "return": [0, 1, 3, 4, 5, 8, 9, 10, 11], "randomli": [0, 10], "drawn": [0, 1, 4, 8, 10], "stratifi": [0, 3, 9, 10, 11], "manner": [0, 9, 11], "whole": [0, 1, 3, 4, 8, 9], "collect": [0, 8, 9, 10], "70": 0, "30": [0, 1, 3, 11], "respect": [0, 1, 5, 8, 11], "option": [0, 1, 3, 5, 10, 11], "indic": [0, 1, 3, 4, 5, 8, 9, 10, 11], "descript": [0, 10], "should": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "standard": [0, 1, 5, 8, 9, 10, 11], "paper": [0, 3, 9, 11], "submit": 0, "kfcv": [0, 9, 10, 11], "order": [0, 2, 3, 5, 8, 10, 11], "accommod": 0, "practic": [0, 4], "could": [0, 1, 3, 4, 5, 6], "first": [0, 1, 2, 3, 5, 8, 10, 11], "instanti": [0, 1, 3, 4, 8, 9, 11], "creat": [0, 6, 8, 11], "time": [0, 1, 3, 8, 10], "fetch_ucilabelledcollect": [0, 10], "nfold": [0, 8, 10], "nrepeat": [0, 10], "abov": [0, 3, 5, 8], "conduct": [0, 8], "2x5fcv": 0, "all": [0, 1, 2, 3, 5, 8, 9, 11], "come": [0, 8, 10, 11], "numer": [0, 1, 3, 6, 10, 11], "form": [0, 8, 10, 11], "dens": [0, 11], "matric": [0, 5, 10], "acut": 0, "120": 0, "6": [0, 1, 3, 5, 10], "508": 0, "b": [0, 8, 10, 11], "583": 0, "417": 0, "balanc": [0, 4, 11], "625": 0, "539": 0, "461": 0, "922": 0, "078": 0, "breast": 0, "cancer": 0, "683": 0, "350": 0, "650": 0, "cmc": 0, "1473": 0, "573": 0, "427": 0, "774": 0, "226": 0, "653": 0, "347": 0, "ctg": 0, "2126": 0, "22": [0, 3, 9, 10], "222": [0, 9], "778": 0, "861": 0, "139": 0, "917": 0, "083": 0, "german": 0, "1000": [0, 4, 11], "24": [0, 9], "300": [0, 1, 9], "700": 0, "haberman": [0, 3], "306": 0, "735": 0, "265": 0, "ionospher": 0, "641": 0, "359": 0, "iri": 0, "150": 0, "667": 0, "333": 0, "mammograph": 0, "830": 0, "514": 0, "486": 0, "pageblock": 0, "5473": 0, "979": 0, "021": 0, "semeion": 0, "1593": 0, "256": [0, 9], "901": 0, "099": 0, "sonar": 0, "208": 0, "60": 0, "534": 0, "466": 0, "spambas": 0, "4601": 0, "57": 0, "606": 0, "394": 0, "spectf": 0, "267": 0, "44": 0, "794": 0, "206": 0, "tictacto": 0, "958": 0, "transfus": 0, "748": 0, "762": 0, "238": 0, "wdbc": 0, "569": 0, "627": 0, "373": 0, "wine": 0, "178": 0, "13": [0, 9], "669": 0, "331": 0, "601": 0, "399": 0, "730": 0, "q": [0, 2, 3, 8, 9, 11], "red": 0, "1599": 0, "465": 0, "535": 0, "white": 0, "4898": 0, "665": 0, "1484": 0, "8": [0, 1, 5, 10, 11], "711": 0, "289": 0, "download": [0, 2, 3, 8, 10], "automat": [0, 1], "thei": [0, 3, 11], "store": [0, 9, 10, 11], "quapy_data": [0, 8], "folder": [0, 10, 11], "faster": [0, 10], "reus": [0, 3, 8, 10], "howev": [0, 4, 5], "requir": [0, 1, 3, 6, 9], "special": [0, 5, 10], "action": 0, "moment": [0, 3], "fulli": [0, 8], "autom": [0, 3, 6], "cardiotocographi": 0, "excel": 0, "file": [0, 5, 8, 9, 10, 11], "user": [0, 1, 5], "instal": [0, 3, 6, 9, 11], "xlrd": [0, 2], "modul": [0, 1, 3, 5, 6, 7], "open": [0, 6, 10], "page": [0, 2, 6], "block": [0, 8], "need": [0, 3, 8, 10, 11], "unix": 0, "compress": 0, "extens": [0, 2, 5], "z": [0, 10], "directli": [0, 1, 3], "doabl": 0, "packag": [0, 2, 3, 6, 7], "like": [0, 1, 3, 5, 8, 9, 10, 11], "gzip": 0, "zip": [0, 5], "uncompress": 0, "o": [0, 8], "depend": [0, 1, 4, 5, 8, 11], "softwar": 0, "manual": 0, "do": [0, 1, 3, 4, 8, 9, 10, 11], "invok": [0, 1, 3, 8, 10], "provid": [0, 3, 5, 6, 10, 11], "loader": [0, 10], "simpl": [0, 3, 5, 11], "deal": 0, "t": [0, 1, 3, 8, 9, 11], "pre": [0, 3], "n": [0, 1, 8, 9, 11], "second": [0, 1, 3, 5, 8, 10], "represent": [0, 3, 8, 9, 11], "col": [0, 10], "int": [0, 5, 8, 10, 11], "float": [0, 3, 8, 9, 10, 11], "charg": [0, 10], "classmethod": [0, 8, 10, 11], "def": [0, 1, 3, 5, 8], "cl": 0, "path": [0, 3, 5, 8, 9, 10, 11], "str": [0, 8, 10, 11], "loader_func": [0, 10], "callabl": [0, 8, 10, 11], "defin": [0, 3, 8, 9, 10, 11], "argument": [0, 1, 3, 5, 8, 10, 11], "initi": [0, 9, 11], "particular": [0, 1, 3, 11], "receiv": [0, 3, 5], "addition": 0, "number": [0, 1, 3, 5, 8, 9, 10, 11], "specifi": [0, 1, 3, 5, 8, 9, 10], "otherwis": [0, 3, 8, 10], "infer": [0, 10], "least": [0, 10], "pass": [0, 1, 5, 8, 9, 11], "along": [0, 3, 8, 11], "train_path": [0, 10], "my_data": 0, "dat": [0, 9], "test_path": [0, 10], "my_custom_load": 0, "rb": 0, "fin": 0, "preprocess": [0, 1, 3, 8, 11], "includ": [0, 1, 3, 5, 6, 10, 11], "text2tfidf": [0, 1, 3, 10], "tfidf": [0, 4, 5, 10], "vector": [0, 8, 9, 10, 11], "reduce_column": [0, 10], "reduc": [0, 10], "column": [0, 10], "base": [0, 3, 6, 8, 9], "term": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "frequenc": [0, 10, 11], "transform": [0, 9, 10, 11], "valu": [0, 1, 3, 8, 9, 10, 11], "score": [0, 1, 4, 8, 9, 10], "subtract": [0, 8, 10], "normal": [0, 1, 3, 8, 10, 11], "deviat": [0, 1, 5, 8, 10], "so": [0, 1, 3, 5, 8, 9, 10, 11], "zero": [0, 8], "unit": [0, 8], "varianc": [0, 5], "textual": [0, 6, 10], "token": [0, 9, 10], "appeal": 1, "tool": [1, 6], "scenario": [1, 3, 4, 5, 6], "dataset": [1, 3, 4, 5, 6, 8, 9, 11], "shift": [1, 4, 6, 8, 9, 11], "particularli": 1, "prior": [1, 3, 4, 5, 6, 8, 11], "probabl": [1, 3, 4, 5, 6, 8, 9, 11], "That": [1, 4], "interest": [1, 5, 6, 8], "estim": [1, 3, 5, 6, 8, 9, 10, 11], "aris": 1, "under": 1, "belief": 1, "those": [1, 3, 4, 5, 8, 9, 11], "might": [1, 8, 10], "ones": [1, 3, 5, 8, 10, 11], "observ": [1, 11], "dure": [1, 5, 11], "other": [1, 3, 5, 6, 8, 10, 11], "word": [1, 3, 6, 9, 10, 11], "simpli": [1, 2, 3, 4, 5, 6, 8, 11], "predictor": 1, "assum": [1, 6, 11], "unlik": [1, 4, 8], "machin": [1, 4, 6, 9], "learn": [1, 2, 3, 4, 6, 8, 9, 10, 11], "govern": 1, "iid": [1, 5, 6], "assumpt": [1, 5, 6], "brief": [1, 10], "dedic": [1, 10], "explain": [1, 5], "here": [1, 11], "mae": [1, 4, 6, 8, 9, 11], "absolut": [1, 3, 5, 6, 8, 11], "mrae": [1, 6, 8, 9, 11], "rel": [1, 3, 8, 10, 11], "mse": [1, 3, 6, 8, 11], "squar": [1, 3, 8], "mkld": [1, 8, 11], "kullback": [1, 3, 8, 11], "leibler": [1, 3, 8, 11], "diverg": [1, 3, 8, 11], "mnkld": [1, 8, 11], "ae": [1, 2, 5, 8, 11], "rae": [1, 2, 8, 11], "se": [1, 8], "kld": [1, 2, 8, 9, 11], "nkld": [1, 2, 6, 8, 9, 11], "individu": [1, 3], "without": [1, 3, 8, 10], "averag": [1, 3, 8, 10, 11], "acc": [1, 3, 5, 6, 8, 11], "accuraci": [1, 5, 8, 11], "f1e": [1, 8], "f1": [1, 8, 9], "true_prev": [1, 5, 8], "prevs_hat": [1, 8], "ndarrai": [1, 3, 8, 10, 11], "contain": [1, 2, 3, 5, 8, 9, 10, 11], "smooth": [1, 8], "stabil": [1, 11], "third": [1, 5], "ep": [1, 8], "none": [1, 4, 8, 9, 10, 11], "paramet": [1, 3, 4, 8, 9, 10, 11], "epsilon": [1, 8, 11], "tradition": 1, "2t": [1, 8], "past": 1, "either": [1, 3, 8, 11], "environ": [1, 3, 4, 5, 8, 11], "variabl": [1, 3, 5, 8, 10], "onc": [1, 3, 5, 8, 10], "ommit": 1, "thereaft": 1, "recommend": [1, 5, 11], "np": [1, 3, 4, 5, 8, 10, 11], "asarrai": 1, "let": [1, 3, 11], "estim_prev": [1, 5, 8], "ae_": 1, "3f": [1, 6], "200": [1, 9], "600": 1, "914": 1, "final": [1, 3, 5, 11], "possibl": [1, 3, 8, 11], "string": [1, 8, 10, 11], "error_funct": 1, "from_nam": [1, 8], "accord": [1, 3, 4, 8, 9, 10, 11], "fix": [1, 4], "cover": [1, 4, 8, 9], "full": [1, 8], "contrast": 1, "natur": [1, 8], "despit": 1, "introduc": 1, "approxim": [1, 5, 8, 9], "preserv": [1, 5, 8], "procol": 1, "equal": [1, 8, 11], "distant": [1, 8], "interv": [1, 5, 8], "n_prevpoint": [1, 4, 5, 8], "determin": [1, 4, 5, 8], "constrain": [1, 5, 8, 10], "obtain": [1, 4, 8, 9, 11], "66": [1, 11], "given": [1, 3, 4, 8, 9, 10, 11], "num_prevalence_combin": [1, 8], "21": [1, 3, 5, 8], "n_class": [1, 3, 8, 9, 10, 11], "n_repeat": [1, 8], "1771": 1, "note": [1, 3, 4, 5, 8, 10], "last": [1, 3, 5, 8, 9, 10], "typic": [1, 4, 5, 8, 9, 10, 11], "singl": [1, 3, 6, 11], "higher": [1, 5], "comput": [1, 3, 5, 8, 11], "perform": [1, 3, 4, 5, 6, 8, 9, 11], "signific": 1, "instead": [1, 3, 4, 8, 10, 11], "work": [1, 3, 4, 5, 8, 10, 11], "wai": [1, 11], "around": [1, 10], "maximum": [1, 8, 9, 11], "budg": 1, "close": [1, 10], "than": [1, 4, 5, 8, 9, 10], "budget": [1, 4], "achiev": [1, 3, 4, 5], "get_nprevpoints_approxim": [1, 8], "5000": [1, 5], "4960": 1, "cost": 1, "sometim": 1, "cumbersom": 1, "control": [1, 4, 8], "overal": 1, "experi": [1, 2, 3, 4, 5, 8], "rather": [1, 4], "By": [1, 3, 8], "avoid": [1, 8], "lead": [1, 10], "closer": 1, "surpass": 1, "script": [1, 2, 3, 6, 11], "pacc": [1, 3, 5, 8, 11], "reli": [1, 3, 8, 11], "logist": [1, 3, 9, 11], "regressor": [1, 3], "classifi": [1, 4, 5, 6, 8, 9, 11], "variou": [1, 5], "metric": [1, 3, 4, 6, 8, 11], "sklearn": [1, 3, 4, 5, 6, 9, 10, 11], "linear_model": [1, 3, 4, 6, 9], "logisticregress": [1, 3, 4, 6, 9, 11], "data": [1, 3, 4, 5, 6, 8, 9, 11], "min_df": [1, 3, 4, 5, 10, 11], "inplac": [1, 3, 10, 11], "lr": [1, 3, 9, 11], "aggreg": [1, 4, 5, 6, 8], "fit": [1, 3, 4, 5, 6, 8, 9, 10, 11], "df": 1, "artificial_sampling_report": 1, "mani": [1, 3, 4, 5, 6, 8, 10, 11], "extract": [1, 8, 10], "categori": [1, 8], "n_repetit": [1, 4, 5], "n_job": [1, 3, 4, 8, 9, 10, 11], "parallel": [1, 3, 8, 9, 10, 11], "worker": [1, 8, 9, 10, 11], "cpu": [1, 9, 11], "random_se": [1, 8], "42": 1, "random": [1, 3, 4, 5, 8, 10], "seed": [1, 4, 8], "replic": [1, 4, 8], "error_metr": [1, 4, 8], "line": [1, 3, 8], "result": [1, 2, 3, 4, 5, 6, 11], "report": 1, "panda": [1, 2], "datafram": 1, "displai": [1, 5, 8, 9], "just": [1, 3], "clearer": 1, "shown": [1, 5, 8], "convert": [1, 3, 8, 9, 10, 11], "repres": [1, 3, 5, 8, 10, 11], "decim": 1, "default": [1, 3, 8, 9, 10, 11], "pd": 1, "set_opt": 1, "expand_frame_repr": 1, "fals": [1, 3, 5, 8, 9, 10, 11], "map": [1, 9, 11], "000": 1, "000e": 1, "091": 1, "909": 1, "009": 1, "048": 1, "426e": 1, "04": 1, "837": 1, "037": 1, "114": 1, "633e": 1, "03": 1, "7": [1, 5, 8, 9, 11], "717": 1, "017": 1, "041": 1, "383e": 1, "366": 1, "634": 1, "034": 1, "070": 1, "412e": 1, "459": 1, "541": 1, "387e": 1, "565": 1, "435": 1, "035": 1, "073": 1, "535e": 1, "654": 1, "346": 1, "046": 1, "108": 1, "701e": 1, "725": 1, "275": 1, "075": 1, "235": 1, "515e": 1, "02": 1, "858": 1, "142": 1, "042": 1, "229": 1, "740e": 1, "945": 1, "055": 1, "27": [1, 3, 9], "357": 1, "219e": 1, "578": 1, "dtype": [1, 10], "float64": 1, "artificial_sampling_ev": [1, 4], "artificial_sampling_predict": [1, 5], "arrai": [1, 3, 5, 8, 9, 10, 11], "pip": 2, "older": 2, "version": [2, 8, 9, 11], "scikit": [2, 3, 4, 8, 9, 10, 11], "numpi": [2, 4, 8, 9], "scipi": [2, 10], "pytorch": [2, 11], "quanet": [2, 6, 9, 11], "svmperf": [2, 3, 8, 11], "patch": [2, 3, 9, 11], "joblib": 2, "tqdm": 2, "matplotlib": [2, 8], "involv": [2, 5, 8], "you": [2, 3], "appli": [2, 3, 4, 5, 8, 9, 10, 11], "ext": 2, "compil": [2, 3], "sourc": [2, 3, 6, 9], "prepare_svmperf": [2, 3], "sh": [2, 3], "job": 2, "directori": [2, 8, 9, 10, 11], "svm_perf_quantif": [2, 3], "optim": [2, 3, 4, 8, 9, 11], "measur": [2, 3, 4, 5, 6, 8, 11], "propos": [2, 3, 11], "barranquero": [2, 3, 9, 11], "extend": [2, 3, 8, 11], "former": [2, 11], "categor": [3, 10], "belong": [3, 11], "non": [3, 11], "group": 3, "though": [3, 8], "plan": 3, "add": [3, 4, 8, 10], "more": [3, 5, 11], "futur": 3, "character": [3, 6], "fact": [3, 5], "product": [3, 10], "quantifi": [3, 4, 5, 6, 8, 10, 11], "shoud": 3, "basequantifi": [3, 8, 11], "abstract": [3, 8, 9, 10, 11], "abstractmethod": 3, "self": [3, 8, 9, 10, 11], "set_param": [3, 8, 9, 11], "get_param": [3, 8, 9, 11], "deep": [3, 8, 11], "familiar": 3, "structur": [3, 11], "inspir": 3, "reason": [3, 5, 6], "why": 3, "ha": [3, 4, 5, 8, 9, 10, 11], "adopt": [3, 4, 10], "respond": 3, "predict": [3, 4, 5, 8, 9, 11], "input": [3, 5, 8, 9, 11], "element": [3, 10, 11], "while": [3, 5, 9, 10, 11], "selector": 3, "process": [3, 4, 8], "hyperparamet": [3, 8, 11], "search": [3, 4, 6, 8, 11], "part": [3, 10], "aggregativequantifi": [3, 11], "must": [3, 10, 11], "fit_learn": 3, "classif_predict": [3, 11], "mention": 3, "befor": [3, 8, 9, 10, 11], "inde": [3, 4], "alreadi": [3, 8, 11], "preclassifi": 3, "maintain": [3, 11], "through": [3, 8], "properti": [3, 8, 9, 10, 11], "learner": [3, 4, 9, 11], "extern": 3, "probabilist": [3, 9, 11], "inherit": 3, "aggregativeprobabilisticquantifi": [3, 11], "posterior": [3, 8, 9, 11], "crisp": [3, 8, 11], "decis": [3, 8, 9, 11], "hard": [3, 9], "classif_posterior": [3, 11], "posterior_prob": [3, 11], "advantag": [3, 11], "procedur": [3, 6, 8], "veri": [3, 5], "effici": 3, "everi": [3, 8, 11], "leverag": 3, "speed": [3, 11], "up": [3, 4, 8, 9, 11], "over": [3, 4, 8], "customarili": [3, 4], "done": 3, "four": 3, "cc": [3, 5, 11], "simplest": 3, "deliv": [3, 11], "adjust": [3, 6, 8, 11], "pcc": [3, 4, 5, 11], "soft": 3, "serv": [3, 8, 10], "complet": [3, 5, 11], "equip": [3, 5], "svm": [3, 5, 6, 9, 10, 11], "linearsvc": [3, 5, 10], "pickl": [3, 8, 10, 11], "alia": [3, 8, 10, 11], "classifyandcount": [3, 11], "estim_preval": [3, 6, 11], "rate": [3, 8, 9, 11], "binari": [3, 5, 6, 8, 9, 10, 11], "init": 3, "addit": 3, "val_split": [3, 4, 9, 11], "integ": [3, 8, 9, 10, 11], "k": [3, 6, 8, 9, 10, 11], "fold": [3, 8, 10, 11], "cross": [3, 8, 9, 10, 11], "specif": [3, 4, 8], "held": [3, 4, 8, 9, 11], "out": [3, 4, 5, 8, 9, 10, 11], "postpon": 3, "constructor": 3, "prevail": 3, "overrid": 3, "illustr": [3, 4, 5], "seem": 3, "calibr": [3, 8], "calibratedclassifiercv": 3, "base_estim": 3, "cv": [3, 4], "predict_proba": [3, 9, 11], "As": [3, 4], "calibratedclassifi": 3, "except": [3, 8, 11], "rais": [3, 8, 11], "lastli": 3, "everyth": 3, "said": 3, "aboud": 3, "sld": [3, 11], "expectationmaximizationquantifi": [3, 11], "describ": [3, 8, 11], "saeren": [3, 11], "m": [3, 8, 11], "latinn": [3, 11], "decaesteck": [3, 11], "c": [3, 4, 8, 9, 10, 11], "2002": 3, "priori": 3, "14": 3, "41": 3, "attempt": [3, 11], "although": [3, 4, 5, 11], "improv": [3, 8, 9, 11], "rank": [3, 9], "almost": 3, "alwai": [3, 4, 5, 11], "among": 3, "effect": 3, "carri": [3, 10, 11], "gonz\u00e1lez": 3, "castro": 3, "v": [3, 8, 9, 11], "alaiz": 3, "rodr\u0131": 3, "guez": 3, "alegr": 3, "2013": 3, "scienc": 3, "218": 3, "146": 3, "It": [3, 4, 5, 8], "allia": 3, "hellingerdistancei": [3, 11], "mixtur": [3, 8, 11], "previou": 3, "overridden": [3, 11], "proport": [3, 4, 9, 10, 11], "taken": [3, 8, 9, 10], "itself": [3, 8, 11], "accept": 3, "elm": [3, 11], "famili": [3, 11], "target": [3, 5, 6, 8, 9, 11], "orient": [3, 6, 8, 11], "joachim": [3, 9, 11], "svmq": [3, 11], "d\u00edez": 3, "reliabl": 3, "pattern": 3, "recognit": 3, "48": 3, "591": 3, "604": 3, "svmkld": [3, 11], "multivari": [3, 9], "transact": 3, "discoveri": 3, "articl": [3, 4], "svmnkld": [3, 11], "svmae": [3, 11], "error": [3, 4, 6, 7, 9, 11], "svmrae": [3, 11], "what": 3, "nowadai": 3, "consid": [3, 5, 8, 9, 10, 11], "behav": [3, 5], "If": [3, 5, 8, 10, 11], "want": [3, 4], "custom": [3, 6, 10], "modifi": [3, 8], "assign": [3, 10], "Then": 3, "re": [3, 4, 9, 10], "thing": 3, "your": 3, "svmperf_hom": 3, "valid_loss": [3, 9, 11], "mycustomloss": 3, "28": [3, 10], "current": [3, 8, 9, 10, 11], "support": [3, 6, 9, 10, 11], "oper": 3, "trivial": 3, "strategi": [3, 4], "2016": [3, 10, 11], "sentiment": [3, 6, 10], "19": [3, 10], "onevsal": [3, 11], "know": 3, "where": [3, 5, 8, 9, 10, 11], "top": [3, 8, 11], "thu": [3, 4, 5, 8, 9, 11], "nor": 3, "castano": [3, 10], "2019": [3, 10, 11], "dynam": [3, 9, 10, 11], "task": [3, 4, 10], "45": [3, 5, 10], "15": [3, 8, 10], "polici": [3, 11], "processor": 3, "av": [3, 11], "ptr": [3, 11], "member": [3, 11], "d": [3, 11], "static": [3, 11], "red_siz": [3, 11], "pleas": 3, "check": [3, 4, 8], "offer": [3, 6], "torch": [3, 9, 11], "embed": [3, 9, 11], "lstm": [3, 9, 11], "cnn": [3, 11], "its": [3, 4, 8, 9, 11], "layer": [3, 9, 11], "neuralclassifiertrain": [3, 9, 11], "cnnnet": [3, 9, 11], "vocabulary_s": [3, 9, 10, 11], "cuda": [3, 9, 11], "supervis": [4, 6], "strongli": [4, 5], "good": [4, 5], "choic": [4, 11], "hyper": [4, 8, 9], "wherebi": 4, "chosen": [4, 8], "pick": 4, "best": [4, 8, 9, 11], "being": [4, 8, 11], "criteria": 4, "solv": [4, 11], "assess": 4, "own": 4, "right": [4, 8, 10], "impos": [4, 8], "aim": [4, 5], "appropri": 4, "configur": [4, 8], "design": 4, "long": [4, 9], "regard": 4, "next": [4, 8, 9, 10], "section": 4, "argu": 4, "alejandro": 4, "fabrizio": 4, "count": [4, 5, 6, 8, 10, 11], "arxiv": 4, "preprint": 4, "2011": 4, "02552": 4, "2020": [4, 9], "varieti": 4, "exhibit": [4, 5], "degre": 4, "model_select": [4, 7, 11], "gridsearchq": [4, 8, 11], "grid": [4, 8, 11], "explor": [4, 8], "portion": 4, "param_grid": [4, 8, 11], "logspac": [4, 11], "class_weight": [4, 11], "eval_budget": 4, "refit": [4, 8], "retrain": [4, 9], "goe": 4, "end": [4, 8, 11], "best_params_": 4, "best_model_": 4, "101": 4, "5f": 4, "system": [4, 11], "start": 4, "hyperparam": 4, "0001": [4, 11], "got": [4, 11], "24987": 4, "48135": 4, "001": [4, 9, 11], "24866": 4, "100000": 4, "43676": 4, "finish": 4, "param": [4, 8, 9, 11], "19982": 4, "develop": [4, 6], "1010": 4, "5005": 4, "54it": 4, "20342": 4, "altern": 4, "computation": 4, "costli": 4, "try": 4, "theoret": 4, "suboptim": 4, "opt": 4, "gridsearchcv": [4, 11], "10000": 4, "5379": 4, "55it": 4, "41734": 4, "wors": [4, 5, 8], "larg": 4, "between": [4, 5, 6, 8, 9, 11], "modal": 4, "turn": 4, "better": 4, "nonetheless": 4, "happen": [4, 5], "basic": [5, 11], "help": 5, "analys": [5, 6], "outcom": 5, "main": 5, "method_nam": [5, 8, 11], "name": [5, 8, 9, 10, 11], "shape": [5, 8, 9, 10, 11], "correspond": [5, 10], "matrix": [5, 8, 11], "appear": 5, "occur": [5, 10], "merg": 5, "emq": [5, 11], "55": 5, "showcas": 5, "wide": 5, "variant": [5, 6, 8, 11], "linear": [5, 8, 11], "review": [5, 6, 10], "step": [5, 8], "05": [5, 8, 11], "gen_data": 5, "base_classifi": 5, "yield": [5, 8, 10, 11], "tr_prev": [5, 8, 11], "append": 5, "__class__": 5, "__name__": 5, "insight": 5, "view": 5, "y": [5, 8, 9, 10, 11], "axi": [5, 8], "against": 5, "x": [5, 8, 9, 10, 11], "unfortun": 5, "limit": [5, 8, 11], "binary_diagon": [5, 8], "train_prev": [5, 8], "savepath": [5, 8], "bin_diag": 5, "png": 5, "save": [5, 8], "pdf": [5, 11], "cyan": 5, "dot": [5, 8], "color": [5, 8], "band": [5, 8], "hidden": [5, 9, 11], "show_std": [5, 8], "unadjust": 5, "bias": 5, "toward": [5, 10], "seen": [5, 8, 11], "evinc": 5, "box": [5, 8], "binary_bias_glob": [5, 8], "bin_bia": 5, "unbias": 5, "center": 5, "tend": 5, "overestim": 5, "high": [5, 8], "lower": [5, 11], "again": 5, "accordingli": 5, "20": [5, 8, 11], "90": [5, 8], "rewrit": 5, "method_data": 5, "training_preval": 5, "linspac": 5, "training_s": 5, "suffic": 5, "latex": 5, "syntax": 5, "_": [5, 8, 10], "now": 5, "clearli": 5, "binary_bias_bin": [5, 8], "broken": [5, 8], "down": [5, 8, 10], "bin": [5, 8, 11], "To": [5, 10], "nbin": [5, 8, 11], "isometr": [5, 8], "subinterv": 5, "interestingli": 5, "enough": 5, "seemingli": 5, "tendenc": 5, "low": [5, 8, 9], "underestim": 5, "beyond": 5, "67": [5, 8], "curios": 5, "pretti": 5, "discuss": 5, "analyz": 5, "compar": [5, 8], "both": 5, "irrespect": [5, 11], "harder": 5, "interpret": [5, 6, 11], "error_by_drift": [5, 8], "error_nam": [5, 8], "n_bin": [5, 8, 11], "err_drift": 5, "whenev": [5, 8], "clear": 5, "lowest": 5, "difficult": 5, "rememb": 5, "solid": 5, "comparison": 5, "detriment": 5, "visual": [5, 6], "hide": 5, "framework": [6, 11], "written": 6, "root": 6, "concept": 6, "baselin": 6, "integr": 6, "commonli": 6, "facilit": 6, "twitter": [6, 10], "true_preval": 6, "hold": [6, 8, 11], "endeavour": [6, 8], "popular": 6, "expect": [6, 11], "maxim": [6, 11], "hdy": [6, 11], "versatil": 6, "etc": 6, "uci": [6, 10], "nativ": 6, "loss": [6, 9, 11], "perf": [6, 9, 11], "ad": 6, "meta": [6, 8], "plot": [6, 7], "diagon": [6, 8], "bia": [6, 8, 9, 11], "drift": 6, "api": 6, "subpackag": 7, "submodul": 7, "util": [7, 9], "content": 7, "bctscalibr": 9, "nbvscalibr": 9, "recalibratedprobabilisticclassifi": 9, "recalibratedprobabilisticclassifierbas": 9, "classes_": [9, 10, 11], "fit_cv": 9, "fit_tr_val": 9, "tscalibr": 9, "vscalibr": 9, "lowranklogisticregress": 9, "document_embed": 9, "lstmnet": 9, "reset_net_param": 9, "textclassifiernet": 9, "dimens": [8, 9, 10, 11], "forward": [9, 11], "xavier_uniform": 9, "torchdataset": 9, "asdataload": 9, "decision_funct": 9, "splitstratifi": 10, "stat": 10, "train_test": 10, "xp": 10, "xy": 10, "split_random": 10, "split_stratifi": 10, "uniform_sampl": 10, "uniform_sampling_index": 10, "fetch_lequa2022": 10, "warn": 10, "indextransform": 10, "add_word": 10, "fit_transform": 10, "reader": 8, "binar": [8, 10], "from_csv": 10, "from_spars": 10, "from_text": 10, "reindex_label": 10, "getptecondestim": 11, "solve_adjust": 11, "adjustedclassifyandcount": 11, "distributionmatch": 11, "dy": 11, "em": 11, "max_it": 11, "explicitlossminimis": 11, "max": 11, "ms2": 11, "mediansweep": 11, "mediansweep2": 11, "probabilisticadjustedclassifyandcount": 11, "probabilisticclassifyandcount": 11, "smm": 11, "t50": 11, "thresholdoptim": 11, "cross_generate_predict": 11, "cross_generate_predictions_depr": 11, "binaryquantifi": 11, "onevsallgener": 11, "eacc": 11, "ecc": 11, "eemq": 11, "ehdi": 11, "epacc": 11, "valid_polici": 11, "ensemblefactori": 11, "get_probability_distribut": 11, "quanetmodul": 11, "quanettrain": 11, "clean_checkpoint": 11, "clean_checkpoint_dir": 11, "mae_loss": 11, "non_aggreg": 8, "maximumlikelihoodprevalenceestim": 11, "absolute_error": 8, "hat": 8, "frac": 8, "mathcal": 8, "sum_": 8, "acc_error": 8, "y_true": 8, "y_pred": 8, "tp": 8, "tn": 8, "fp": 8, "fn": 8, "stand": [8, 11], "f1_error": 8, "macro": 8, "f_1": 8, "harmon": 8, "recal": 8, "2tp": 8, "independ": [8, 11], "err_nam": 8, "p_hat": 8, "d_": 8, "kl": 8, "log": [8, 10], "factor": 8, "beforehand": 8, "n_sampl": [8, 9], "mean_absolute_error": 8, "mean_relative_absolute_error": 8, "relative_absolute_error": 8, "underlin": 8, "displaystyl": 8, "abstractprotocol": 8, "union": [8, 11], "aggr_speedup": 8, "auto": 8, "evaluation_report": 8, "app": [8, 11], "repeat": 8, "smooth_limits_epsilon": 8, "random_st": [8, 10], "return_typ": 8, "sample_prev": 8, "abstractstochasticseededprotocol": 8, "onlabelledcollectionprotocol": 8, "95": 8, "copi": [8, 10], "quantiti": 8, "labelled_collect": 8, "prevalence_grid": 8, "exhaust": 8, "sum": [8, 11], "implicit": 8, "return_constrained_dim": 8, "rest": [8, 9, 10, 11], "quit": 8, "obvious": 8, "determinist": 8, "anywher": 8, "multipli": 8, "necessari": 8, "samples_paramet": 8, "total": 8, "parent": 8, "sequenc": 8, "enforc": 8, "collat": 8, "arg": [8, 10, 11], "domainmix": 8, "domaina": 8, "domainb": 8, "mixture_point": 8, "domain": 8, "scale": [8, 9, 11], "npp": 8, "draw": 8, "uniformli": 8, "therefor": 8, "get_col": 8, "get_labelled_collect": 8, "on_preclassified_inst": 8, "pre_classif": 8, "in_plac": 8, "usimplexpp": 8, "kraemer": 8, "algorithm": [8, 11], "sens": 8, "guarante": [8, 10], "prefer": 8, "intract": 8, "hellingerdist": 8, "hellingh": 8, "distanc": [8, 11], "hd": [8, 11], "discret": [8, 11], "sqrt": 8, "p_i": 8, "q_i": 8, "real": [8, 9, 10, 11], "topsoedist": 8, "1e": [8, 9, 11], "topso": [8, 11], "adjusted_quantif": 8, "prevalence_estim": 8, "tpr": [8, 11], "fpr": [8, 11], "clip": 8, "exce": 8, "check_prevalence_vector": 8, "raise_except": 8, "toleranz": 8, "08": 8, "combinations_budget": 8, "largest": 8, "dimension": [8, 9, 10, 11], "repetit": 8, "less": [8, 10], "normalize_preval": 8, "l1": [8, 11], "calcul": 8, "binom": 8, "mass": 8, "alloc": [8, 9], "solut": 8, "star": 8, "bar": 8, "prevalence_from_label": 8, "n_instanc": [8, 9, 11], "correctli": 8, "even": 8, "len": 8, "prevalence_from_prob": 8, "bool": [8, 9, 11], "argmax": 8, "prevalence_linspac": 8, "01": [8, 9, 11], "separ": [8, 10], "99": 8, "uniform_prevalence_sampl": 8, "adapt": [8, 9], "post": 8, "http": [8, 10, 11], "stackexchang": 8, "com": 8, "question": 8, "3227": 8, "uniform": [8, 10], "uniform_simplex_sampl": 8, "dict": [8, 10, 11], "timeout": 8, "dictionari": [8, 9, 10, 11], "kei": [8, 10], "quantification_error": 8, "whether": [8, 9, 10, 11], "ignor": [8, 10, 11], "gen": 8, "establish": 8, "timer": 8, "longer": 8, "timeouterror": 8, "bound": [8, 11], "stdout": 8, "best_model": 8, "after": [8, 11], "minim": [8, 11], "routin": [8, 10, 11], "unus": [8, 9], "contanin": 8, "cross_val_predict": 8, "akin": [8, 11], "issu": 8, "reproduc": [8, 10], "pos_class": [8, 10], "titl": 8, "colormap": 8, "listedcolormap": 8, "vertical_xtick": 8, "legend": 8, "local": 8, "sign": 8, "minu": 8, "classs": 8, "compon": [8, 9, 11], "cm": 8, "tab10": 8, "secondari": 8, "global": 8, "method_ord": 8, "henc": [8, 10], "conveni": 8, "multiclass": [8, 10, 11], "inconveni": 8, "leyend": 8, "hightlight": 8, "associ": [8, 10], "brokenbar_supremacy_by_drift": 8, "isomer": 8, "x_error": 8, "y_error": 8, "ttest_alpha": 8, "005": 8, "tail_density_threshold": 8, "region": 8, "chart": 8, "condit": [8, 11], "ii": 8, "significantli": 8, "side": 8, "confid": 8, "percentil": 8, "divid": 8, "amount": 8, "similar": [8, 11], "threshold": [8, 11], "densiti": 8, "tail": 8, "discard": 8, "outlier": 8, "show_dens": 8, "show_legend": 8, "logscal": 8, "vline": 8, "especi": 8, "mai": 8, "cumberson": 8, "gain": 8, "understand": 8, "fare": 8, "regim": 8, "highlight": 8, "vertic": 8, "earlystop": 8, "patienc": [8, 9, 11], "lower_is_bett": 8, "earli": [8, 9, 11], "stop": [8, 9, 11], "epoch": [8, 9, 11], "best_epoch": 8, "best_scor": 8, "consecut": [8, 9, 11], "monitor": 8, "obtaind": 8, "far": [8, 9, 10], "flag": 8, "keep": 8, "track": 8, "boolean": [8, 10, 11], "create_if_not_exist": 8, "makedir": 8, "exist_ok": 8, "join": 8, "dir": 8, "subdir": 8, "anotherdir": 8, "create_parent_dir": 8, "exist": 8, "txt": 8, "download_fil": 8, "url": 8, "archive_filenam": 8, "destin": 8, "filenam": 8, "download_file_if_not_exist": 8, "dowload": 8, "get_quapy_hom": 8, "home": [8, 10], "perman": 8, "map_parallel": 8, "func": 8, "slice": 8, "item": 8, "wrapper": [8, 9, 10, 11], "multiprocess": 8, "delai": 8, "args_i": 8, "silent": [8, 11], "child": 8, "ensur": 8, "pickled_resourc": 8, "pickle_path": 8, "generation_func": 8, "fast": [8, 10], "resourc": 8, "some_arrai": 8, "mock": [8, 9], "rand": 8, "my_arrai": 8, "pkl": 8, "save_text_fil": 8, "disk": 8, "miss": 8, "temp_se": 8, "context": 8, "tempor": 8, "outer": 8, "state": 8, "within": [8, 11], "get_njob": [], "correct": [9, 11], "temperatur": [9, 11], "bct": [9, 11], "abstent": 9, "alexandari": [9, 11], "afterward": [9, 11], "No": [9, 11], "nbv": [9, 11], "baseestim": [9, 11], "calibratorfactori": 9, "n_compon": 9, "kwarg": [9, 10, 11], "decomposit": 9, "truncatedsvd": 9, "princip": 9, "regress": 9, "n_featur": 9, "length": [9, 10], "eventu": [9, 10], "unalt": 9, "emb": 9, "embedding_s": 9, "hidden_s": 9, "repr_siz": 9, "kernel_height": 9, "stride": 9, "pad": [9, 10], "drop_p": 9, "convolut": 9, "vocabulari": [9, 10], "kernel": 9, "drop": 9, "dropout": [9, 11], "batch": 9, "dataload": 9, "tensor": 9, "n_dimens": 9, "lstm_class_nlay": 9, "short": 9, "memori": 9, "net": 9, "weight_decai": 9, "batch_siz": 9, "64": [9, 11], "batch_size_test": 9, "512": [9, 11], "padding_length": 9, "checkpointpath": 9, "checkpoint": [9, 11], "classifier_net": 9, "weight": [9, 10], "decai": 9, "wait": 9, "enabl": 9, "gpu": [9, 11], "vocab_s": 9, "reiniti": 9, "trainer": 9, "disjoint": 9, "embed_s": 9, "nn": 9, "pad_length": 9, "xavier": 9, "shuffl": [9, 10], "longest": 9, "shorter": 9, "svmperf_bas": [9, 11], "classifiermixin": 9, "thorsten": 9, "refer": [9, 10], "svm_perf_learn": 9, "svm_perf_classifi": 9, "trade": 9, "off": 9, "margin": 9, "std": 9, "qacc": 9, "qf1": 9, "qgm": 9, "12": 9, "26": 9, "23": 9, "train_siz": 10, "conform": 10, "round": 10, "loader_kwarg": 10, "read": 10, "tupl": [10, 11], "tr": 10, "te": 10, "csr": 10, "csr_matrix": 10, "4403": 10, "my_collect": 10, "codefram": 10, "larger": [10, 11], "actual": [10, 11], "empti": 10, "met": 10, "whose": [10, 11], "train_prop": 10, "left": [8, 10], "stratif": 10, "greater": 10, "dataset_nam": 10, "data_hom": 10, "test_split": 10, "predefin": 10, "uci_dataset": 10, "dump": 10, "leav": 10, "quay_data": 10, "ml": 10, "5fcvx2": 10, "x2": 10, "offici": 10, "lequa": 10, "competit": 10, "t1a": 10, "t1b": 10, "t2a": 10, "t2b": 10, "raw": 10, "merchandis": 10, "sperduti": 10, "2022": 10, "overview": 10, "clef": 10, "lequa2022_experi": 10, "py": 10, "guid": 10, "val_gen": 10, "test_gen": 10, "samplesfromdir": 10, "minimun": 10, "kept": 10, "subsequ": 10, "mining6": 10, "devel": 10, "style": 10, "countvector": 10, "keyword": [10, 11], "nogap": 10, "regardless": 10, "codifi": 10, "unknown": 10, "surfac": 10, "assert": 10, "gap": 10, "preced": 10, "decid": 10, "uniqu": 10, "rare": 10, "unk": 10, "minimum": [10, 11], "occurr": 10, "org": [10, 11], "stabl": 10, "feature_extract": 10, "html": 10, "subtyp": 10, "spmatrix": 10, "remov": [10, 11], "infrequ": 10, "aka": [10, 11], "sublinear_tf": 10, "scall": 10, "counter": 10, "tfidfvector": 10, "whcih": 10, "had": 10, "encod": 10, "utf": 10, "csv": 10, "feat1": 10, "feat2": 10, "featn": 10, "covari": 10, "express": 10, "row": 10, "class2int": 10, "collet": 10, "fomart": 10, "progress": 10, "sentenc": 10, "classnam": 10, "u1": 10, "misclassif": 11, "n_classes_": [], "fit_classifi": 11, "bypass": 11, "y_": 11, "ptecondestim": 11, "prevs_estim": 11, "ax": 11, "entri": 11, "y_i": 11, "y_j": 11, "_posterior_probabilities_": 11, "attribut": 11, "subclass": 11, "give": 11, "outsid": 11, "unless": 11, "noth": 11, "els": 11, "cdf": 11, "match": 11, "helling": 11, "sought": 11, "channel": 11, "proper": 11, "ch": 11, "di": 11, "dij": 11, "fraction": 11, "th": 11, "tol": 11, "ternari": 11, "dl": 11, "doi": 11, "1145": 11, "3219819": 11, "3220059": 11, "histogram": 11, "toler": 11, "explicit": 11, "exact_train_prev": 11, "recalib": 11, "updat": 11, "likelihood": [9, 11], "mutual": 11, "recurs": 11, "until": 11, "converg": 11, "suggest": 11, "recalibr": 11, "reach": 11, "loop": 11, "cumul": 11, "unlabel": 11, "latter": 11, "forman": 11, "2006": 11, "2008": 11, "goal": 11, "bring": 11, "denomin": 11, "median": 11, "sweep": 11, "binary_quantifi": 11, "prevel": 11, "emploi": 11, "resp": 11, "subobject": 11, "nest": 11, "pipelin": 11, "__": 11, "simplif": 11, "2021": 11, "equival": 11, "cosest": 11, "heurist": 11, "choos": 11, "ground": 11, "complement": 11, "param_mod_sel": 11, "param_model_sel": 11, "min_po": 11, "max_sample_s": 11, "closest": 11, "preliminari": 11, "recomput": 11, "compat": 11, "l": 11, "base_quantifier_class": 11, "factori": 11, "common": 11, "doc_embedding_s": 11, "stats_siz": 11, "lstm_hidden_s": 11, "lstm_nlayer": 11, "ff_layer": 11, "1024": 11, "bidirect": 11, "qdrop_p": 11, "order_bi": 11, "cell": 11, "connect": 11, "ff": 11, "sort": 11, "doc_embed": 11, "doc_posterior": 11, "recip": 11, "care": 11, "regist": 11, "hook": 11, "n_epoch": 11, "tr_iter_per_poch": 11, "va_iter_per_poch": 11, "checkpointdir": 11, "checkpointnam": 11, "phase": 11, "anyth": 11, "truth": 11, "mlpe": 11, "lazi": 11, "put": 11, "assumpion": 11, "beat": [9, 11], "estimant": 11, "kundaj": 9, "shrikumar": 9, "novemb": 9, "232": 9, "pmlr": 9, "outpu": 9, "partit": 9, "ight": [], "valueerror": 8}, "objects": {"": [[8, 0, 0, "-", "quapy"]], "quapy": [[9, 0, 0, "-", "classification"], [10, 0, 0, "-", "data"], [8, 0, 0, "-", "error"], [8, 0, 0, "-", "evaluation"], [8, 0, 0, "-", "functional"], [11, 0, 0, "-", "method"], [8, 0, 0, "-", "model_selection"], [8, 0, 0, "-", "plot"], [8, 0, 0, "-", "protocol"], [8, 0, 0, "-", "util"]], "quapy.classification": [[9, 0, 0, "-", "calibration"], [9, 0, 0, "-", "methods"], [9, 0, 0, "-", "neural"], [9, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[9, 1, 1, "", "BCTSCalibration"], [9, 1, 1, "", "NBVSCalibration"], [9, 1, 1, "", "RecalibratedProbabilisticClassifier"], [9, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [9, 1, 1, "", "TSCalibration"], [9, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[9, 2, 1, "", "classes_"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "fit_cv"], [9, 3, 1, "", "fit_tr_val"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[9, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural": [[9, 1, 1, "", "CNNnet"], [9, 1, 1, "", "LSTMnet"], [9, 1, 1, "", "NeuralClassifierTrainer"], [9, 1, 1, "", "TextClassifierNet"], [9, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[9, 2, 1, "", "device"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "reset_net_params"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[9, 3, 1, "", "dimensions"], [9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "forward"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict_proba"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"], [9, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[9, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[9, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[9, 3, 1, "", "decision_function"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "set_params"], [9, 4, 1, "", "valid_losses"]], "quapy.data": [[10, 0, 0, "-", "base"], [10, 0, 0, "-", "datasets"], [10, 0, 0, "-", "preprocessing"], [10, 0, 0, "-", "reader"]], "quapy.data.base": [[10, 1, 1, "", "Dataset"], [10, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[10, 3, 1, "", "SplitStratified"], [10, 2, 1, "", "binary"], [10, 2, 1, "", "classes_"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 3, 1, "", "stats"], [10, 2, 1, "", "train_test"], [10, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[10, 2, 1, "", "X"], [10, 2, 1, "", "Xp"], [10, 2, 1, "", "Xy"], [10, 2, 1, "", "binary"], [10, 3, 1, "", "counts"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 2, 1, "", "p"], [10, 3, 1, "", "prevalence"], [10, 3, 1, "", "sampling"], [10, 3, 1, "", "sampling_from_index"], [10, 3, 1, "", "sampling_index"], [10, 3, 1, "", "split_random"], [10, 3, 1, "", "split_stratified"], [10, 3, 1, "", "stats"], [10, 3, 1, "", "uniform_sampling"], [10, 3, 1, "", "uniform_sampling_index"], [10, 2, 1, "", "y"]], "quapy.data.datasets": [[10, 5, 1, "", "fetch_UCIDataset"], [10, 5, 1, "", "fetch_UCILabelledCollection"], [10, 5, 1, "", "fetch_lequa2022"], [10, 5, 1, "", "fetch_reviews"], [10, 5, 1, "", "fetch_twitter"], [10, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[10, 1, 1, "", "IndexTransformer"], [10, 5, 1, "", "index"], [10, 5, 1, "", "reduce_columns"], [10, 5, 1, "", "standardize"], [10, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[10, 3, 1, "", "add_word"], [10, 3, 1, "", "fit"], [10, 3, 1, "", "fit_transform"], [10, 3, 1, "", "transform"], [10, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[10, 5, 1, "", "binarize"], [10, 5, 1, "", "from_csv"], [10, 5, 1, "", "from_sparse"], [10, 5, 1, "", "from_text"], [10, 5, 1, "", "reindex_labels"]], "quapy.error": [[8, 5, 1, "", "absolute_error"], [8, 5, 1, "", "acc_error"], [8, 5, 1, "", "acce"], [8, 5, 1, "", "ae"], [8, 5, 1, "", "f1_error"], [8, 5, 1, "", "f1e"], [8, 5, 1, "", "from_name"], [8, 5, 1, "", "kld"], [8, 5, 1, "", "mae"], [8, 5, 1, "", "mean_absolute_error"], [8, 5, 1, "", "mean_relative_absolute_error"], [8, 5, 1, "", "mkld"], [8, 5, 1, "", "mnkld"], [8, 5, 1, "", "mrae"], [8, 5, 1, "", "mse"], [8, 5, 1, "", "nkld"], [8, 5, 1, "", "rae"], [8, 5, 1, "", "relative_absolute_error"], [8, 5, 1, "", "se"], [8, 5, 1, "", "smooth"]], "quapy.evaluation": [[8, 5, 1, "", "evaluate"], [8, 5, 1, "", "evaluation_report"], [8, 5, 1, "", "prediction"]], "quapy.functional": [[8, 5, 1, "", "HellingerDistance"], [8, 5, 1, "", "TopsoeDistance"], [8, 5, 1, "", "adjusted_quantification"], [8, 5, 1, "", "check_prevalence_vector"], [8, 5, 1, "", "get_nprevpoints_approximation"], [8, 5, 1, "", "normalize_prevalence"], [8, 5, 1, "", "num_prevalence_combinations"], [8, 5, 1, "", "prevalence_from_labels"], [8, 5, 1, "", "prevalence_from_probabilities"], [8, 5, 1, "", "prevalence_linspace"], [8, 5, 1, "", "strprev"], [8, 5, 1, "", "uniform_prevalence_sampling"], [8, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[11, 0, 0, "-", "aggregative"], [11, 0, 0, "-", "base"], [11, 0, 0, "-", "meta"], [11, 0, 0, "-", "neural"], [11, 0, 0, "-", "non_aggregative"]], "quapy.method.aggregative": [[11, 1, 1, "", "ACC"], [11, 4, 1, "", "AdjustedClassifyAndCount"], [11, 1, 1, "", "AggregativeProbabilisticQuantifier"], [11, 1, 1, "", "AggregativeQuantifier"], [11, 1, 1, "", "CC"], [11, 4, 1, "", "ClassifyAndCount"], [11, 1, 1, "", "DistributionMatching"], [11, 1, 1, "", "DyS"], [11, 1, 1, "", "ELM"], [11, 1, 1, "", "EMQ"], [11, 4, 1, "", "ExpectationMaximizationQuantifier"], [11, 4, 1, "", "ExplicitLossMinimisation"], [11, 1, 1, "", "HDy"], [11, 4, 1, "", "HellingerDistanceY"], [11, 1, 1, "", "MAX"], [11, 1, 1, "", "MS"], [11, 1, 1, "", "MS2"], [11, 4, 1, "", "MedianSweep"], [11, 4, 1, "", "MedianSweep2"], [11, 1, 1, "", "OneVsAll"], [11, 1, 1, "", "PACC"], [11, 1, 1, "", "PCC"], [11, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [11, 4, 1, "", "ProbabilisticClassifyAndCount"], [11, 4, 1, "", "SLD"], [11, 1, 1, "", "SMM"], [11, 1, 1, "", "SVMAE"], [11, 1, 1, "", "SVMKLD"], [11, 1, 1, "", "SVMNKLD"], [11, 1, 1, "", "SVMQ"], [11, 1, 1, "", "SVMRAE"], [11, 1, 1, "", "T50"], [11, 1, 1, "", "ThresholdOptimization"], [11, 1, 1, "", "X"], [11, 5, 1, "", "cross_generate_predictions"], [11, 5, 1, "", "cross_generate_predictions_depr"]], "quapy.method.aggregative.ACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"], [11, 3, 1, "", "solve_adjustment"]], "quapy.method.aggregative.AggregativeProbabilisticQuantifier": [[11, 3, 1, "", "classify"]], "quapy.method.aggregative.AggregativeQuantifier": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 2, 1, "", "classifier"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.aggregative.CC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DistributionMatching": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DyS": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ELM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.EMQ": [[11, 3, 1, "", "EM"], [11, 4, 1, "", "EPSILON"], [11, 4, 1, "", "MAX_ITER"], [11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.OneVsAll": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "set_params"]], "quapy.method.aggregative.PACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.SMM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ThresholdOptimization": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.base": [[11, 1, 1, "", "BaseQuantifier"], [11, 1, 1, "", "BinaryQuantifier"], [11, 1, 1, "", "OneVsAllGeneric"]], "quapy.method.base.BaseQuantifier": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[11, 2, 1, "", "classes"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.meta": [[11, 5, 1, "", "EACC"], [11, 5, 1, "", "ECC"], [11, 5, 1, "", "EEMQ"], [11, 5, 1, "", "EHDy"], [11, 5, 1, "", "EPACC"], [11, 1, 1, "", "Ensemble"], [11, 5, 1, "", "ensembleFactory"], [11, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[11, 4, 1, "", "VALID_POLICIES"], [11, 2, 1, "", "aggregative"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 2, 1, "", "probabilistic"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.neural": [[11, 1, 1, "", "QuaNetModule"], [11, 1, 1, "", "QuaNetTrainer"], [11, 5, 1, "", "mae_loss"]], "quapy.method.neural.QuaNetModule": [[11, 2, 1, "", "device"], [11, 3, 1, "", "forward"], [11, 4, 1, "", "training"]], "quapy.method.neural.QuaNetTrainer": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "clean_checkpoint"], [11, 3, 1, "", "clean_checkpoint_dir"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[11, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.model_selection": [[8, 1, 1, "", "GridSearchQ"], [8, 5, 1, "", "cross_val_predict"]], "quapy.model_selection.GridSearchQ": [[8, 3, 1, "", "best_model"], [8, 3, 1, "", "fit"], [8, 3, 1, "", "get_params"], [8, 3, 1, "", "quantify"], [8, 3, 1, "", "set_params"]], "quapy.plot": [[8, 5, 1, "", "binary_bias_bins"], [8, 5, 1, "", "binary_bias_global"], [8, 5, 1, "", "binary_diagonal"], [8, 5, 1, "", "brokenbar_supremacy_by_drift"], [8, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[8, 1, 1, "", "APP"], [8, 1, 1, "", "AbstractProtocol"], [8, 1, 1, "", "AbstractStochasticSeededProtocol"], [8, 1, 1, "", "DomainMixer"], [8, 1, 1, "", "NPP"], [8, 1, 1, "", "OnLabelledCollectionProtocol"], [8, 1, 1, "", "USimplexPP"]], "quapy.protocol.APP": [[8, 3, 1, "", "prevalence_grid"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[8, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[8, 3, 1, "", "collator"], [8, 2, 1, "", "random_state"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.NPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[8, 4, 1, "", "RETURN_TYPES"], [8, 3, 1, "", "get_collator"], [8, 3, 1, "", "get_labelled_collection"], [8, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.USimplexPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.util": [[8, 1, 1, "", "EarlyStop"], [8, 5, 1, "", "create_if_not_exist"], [8, 5, 1, "", "create_parent_dir"], [8, 5, 1, "", "download_file"], [8, 5, 1, "", "download_file_if_not_exists"], [8, 5, 1, "", "get_quapy_home"], [8, 5, 1, "", "map_parallel"], [8, 5, 1, "", "parallel"], [8, 5, 1, "", "pickled_resource"], [8, 5, 1, "", "save_text_file"], [8, 5, 1, "", "temp_seed"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"dataset": [0, 10], "review": 0, "twitter": 0, "sentiment": 0, "uci": 0, "machin": 0, "learn": 0, "issu": 0, "ad": 0, "custom": 0, "data": [0, 10], "process": 0, "evalu": [1, 8], "error": [1, 5, 8], "measur": 1, "protocol": [1, 8], "instal": 2, "requir": 2, "svm": 2, "perf": 2, "quantif": [2, 3, 4, 5], "orient": [2, 4], "loss": [2, 3, 4], "method": [3, 9, 11], "aggreg": [3, 11], "The": 3, "classifi": 3, "count": 3, "variant": 3, "expect": 3, "maxim": 3, "emq": 3, "helling": 3, "distanc": 3, "y": 3, "hdy": 3, "explicit": 3, "minim": 3, "meta": [3, 11], "model": [3, 4], "ensembl": 3, "quanet": 3, "neural": [3, 9, 11], "network": 3, "select": 4, "target": 4, "classif": [4, 9], "plot": [5, 8], "diagon": 5, "bia": 5, "drift": 5, "welcom": 6, "quapi": [6, 7, 8, 9, 10, 11], "": 6, "document": 6, "introduct": 6, "A": 6, "quick": 6, "exampl": 6, "featur": 6, "content": [6, 8, 9, 10, 11], "indic": 6, "tabl": 6, "packag": [8, 9, 10, 11], "subpackag": 8, "submodul": [8, 9, 10, 11], "function": 8, "model_select": 8, "util": 8, "modul": [8, 9, 10, 11], "calibr": 9, "svmperf": 9, "base": [10, 11], "preprocess": 10, "reader": 10, "non_aggreg": 11}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Datasets": [[0, "datasets"]], "Reviews Datasets": [[0, "reviews-datasets"]], "Twitter Sentiment Datasets": [[0, "twitter-sentiment-datasets"]], "UCI Machine Learning": [[0, "uci-machine-learning"]], "Issues:": [[0, "issues"]], "Adding Custom Datasets": [[0, "adding-custom-datasets"]], "Data Processing": [[0, "data-processing"]], "Evaluation": [[1, "evaluation"]], "Error Measures": [[1, "error-measures"]], "Evaluation Protocols": [[1, "evaluation-protocols"]], "Installation": [[2, "installation"]], "Requirements": [[2, "requirements"]], "SVM-perf with quantification-oriented losses": [[2, "svm-perf-with-quantification-oriented-losses"]], "Quantification Methods": [[3, "quantification-methods"]], "Aggregative Methods": [[3, "aggregative-methods"]], "The Classify & Count variants": [[3, "the-classify-count-variants"]], "Expectation Maximization (EMQ)": [[3, "expectation-maximization-emq"]], "Hellinger Distance y (HDy)": [[3, "hellinger-distance-y-hdy"]], "Explicit Loss Minimization": [[3, "explicit-loss-minimization"]], "Meta Models": [[3, "meta-models"]], "Ensembles": [[3, "ensembles"]], "The QuaNet neural network": [[3, "the-quanet-neural-network"]], "Model Selection": [[4, "model-selection"]], "Targeting a Quantification-oriented loss": [[4, "targeting-a-quantification-oriented-loss"]], "Targeting a Classification-oriented loss": [[4, "targeting-a-classification-oriented-loss"]], "Plotting": [[5, "plotting"]], "Diagonal Plot": [[5, "diagonal-plot"]], "Quantification bias": [[5, "quantification-bias"]], "Error by Drift": [[5, "error-by-drift"]], "Welcome to QuaPy\u2019s documentation!": [[6, "welcome-to-quapy-s-documentation"]], "Introduction": [[6, "introduction"]], "A quick example:": [[6, "a-quick-example"]], "Features": [[6, "features"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "quapy": [[7, "quapy"]], "Submodules": [[9, "submodules"], [8, "submodules"], [10, "submodules"], [11, "submodules"]], "Module contents": [[9, "module-quapy.classification"], [8, "module-quapy"], [10, "module-quapy.data"], [11, "module-quapy.method"]], "quapy.classification package": [[9, "quapy-classification-package"]], "quapy.classification.calibration": [[9, "quapy-classification-calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "quapy package": [[8, "quapy-package"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.protocol": [[8, "quapy-protocol"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.util": [[8, "module-quapy.util"]], "Subpackages": [[8, "subpackages"]], "quapy.data package": [[10, "quapy-data-package"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "quapy.method package": [[11, "quapy-method-package"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[8, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol"]], "domainmixer (class in quapy.protocol)": [[8, "quapy.protocol.DomainMixer"]], "earlystop (class in quapy.util)": [[8, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[8, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[8, "quapy.functional.HellingerDistance"]], "npp (class in quapy.protocol)": [[8, "quapy.protocol.NPP"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[8, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "topsoedistance() (in module quapy.functional)": [[8, "quapy.functional.TopsoeDistance"]], "usimplexpp (class in quapy.protocol)": [[8, "quapy.protocol.USimplexPP"]], "absolute_error() (in module quapy.error)": [[8, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[8, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[8, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[8, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[8, "quapy.error.ae"]], "best_model() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[8, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[8, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[8, "quapy.functional.check_prevalence_vector"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "create_if_not_exist() (in module quapy.util)": [[8, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[8, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[8, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[8, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[8, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[8, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluate"]], "evaluation_report() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluation_report"]], "f1_error() (in module quapy.error)": [[8, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[8, "quapy.error.f1e"]], "fit() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[8, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[8, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[8, "quapy.util.get_quapy_home"]], "kld() (in module quapy.error)": [[8, "quapy.error.kld"]], "mae() (in module quapy.error)": [[8, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[8, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[8, "quapy.error.mkld"]], "mnkld() (in module quapy.error)": [[8, "quapy.error.mnkld"]], "module": [[8, "module-quapy"], [8, "module-quapy.error"], [8, "module-quapy.evaluation"], [8, "module-quapy.functional"], [8, "module-quapy.model_selection"], [8, "module-quapy.plot"], [8, "module-quapy.protocol"], [8, "module-quapy.util"], [10, "module-quapy.data"], [10, "module-quapy.data.base"], [10, "module-quapy.data.datasets"], [10, "module-quapy.data.preprocessing"], [10, "module-quapy.data.reader"], [11, "module-quapy.method"], [11, "module-quapy.method.aggregative"], [11, "module-quapy.method.base"], [11, "module-quapy.method.meta"], [11, "module-quapy.method.neural"], [11, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[8, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[8, "quapy.error.mse"]], "nkld() (in module quapy.error)": [[8, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[8, "quapy.functional.normalize_prevalence"]], "num_prevalence_combinations() (in module quapy.functional)": [[8, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "parallel() (in module quapy.util)": [[8, "quapy.util.parallel"]], "pickled_resource() (in module quapy.util)": [[8, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[8, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[8, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[8, "module-quapy"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.protocol": [[8, "module-quapy.protocol"]], "quapy.util": [[8, "module-quapy.util"]], "rae() (in module quapy.error)": [[8, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[8, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[8, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[8, "quapy.error.smooth"]], "strprev() (in module quapy.functional)": [[8, "quapy.functional.strprev"]], "temp_seed() (in module quapy.util)": [[8, "quapy.util.temp_seed"]], "total() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[8, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_simplex_sampling"]], "dataset (class in quapy.data.base)": [[10, "quapy.data.base.Dataset"]], "indextransformer (class in quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.IndexTransformer"]], "labelledcollection (class in quapy.data.base)": [[10, "quapy.data.base.LabelledCollection"]], "splitstratified() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.SplitStratified"]], "x (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.X"]], "xp (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xp"]], "xy (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xy"]], "add_word() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.add_word"]], "binarize() (in module quapy.data.reader)": [[10, "quapy.data.reader.binarize"]], "binary (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.binary"]], "binary (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.binary"]], "classes_ (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.classes_"]], "counts() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.counts"]], "fetch_ucidataset() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCIDataset"]], "fetch_ucilabelledcollection() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCILabelledCollection"]], "fetch_lequa2022() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_lequa2022"]], "fetch_reviews() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_reviews"]], "fetch_twitter() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_twitter"]], "fit() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit"]], "fit_transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit_transform"]], "from_csv() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_csv"]], "from_sparse() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_sparse"]], "from_text() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_text"]], "index() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.index"]], "kfcv() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.kFCV"]], "kfcv() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.kFCV"]], "load() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.load"]], "load() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.load"]], "n_classes (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.n_classes"]], "n_classes (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.n_classes"]], "p (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.p"]], "prevalence() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.prevalence"]], "quapy.data": [[10, "module-quapy.data"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "reduce_columns() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.reduce_columns"]], "reindex_labels() (in module quapy.data.reader)": [[10, "quapy.data.reader.reindex_labels"]], "sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling"]], "sampling_from_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_from_index"]], "sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_index"]], "split_random() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_random"]], "split_stratified() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_stratified"]], "standardize() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.standardize"]], "stats() (quapy.data.base.dataset method)": [[10, "quapy.data.base.Dataset.stats"]], "stats() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.stats"]], "text2tfidf() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.text2tfidf"]], "train_test (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.train_test"]], "transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.transform"]], "uniform_sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling"]], "uniform_sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling_index"]], "vocabulary_size (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.vocabulary_size"]], "vocabulary_size() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.vocabulary_size"]], "warn() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.warn"]], "y (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.y"]], "acc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativeprobabilisticquantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier"]], "aggregativequantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeQuantifier"]], "basequantifier (class in quapy.method.base)": [[11, "quapy.method.base.BaseQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[11, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.CC"]], "classifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ClassifyAndCount"]], "distributionmatching (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DistributionMatching"]], "dys (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[11, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[11, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[11, "quapy.method.meta.EHDy"]], "elm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ELM"]], "em() (quapy.method.aggregative.emq class method)": [[11, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.EMQ"]], "epacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[11, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "explicitlossminimisation (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExplicitLossMinimisation"]], "hdy (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.HellingerDistanceY"]], "max (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.MAX_ITER"]], "ms (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS"]], "ms2 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "mediansweep (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep"]], "mediansweep2 (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep2"]], "onevsall (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.OneVsAll"]], "onevsallgeneric (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetModule"]], "quanettrainer (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetTrainer"]], "sld (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SMM"]], "svmae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMAE"]], "svmkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMKLD"]], "svmnkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMNKLD"]], "svmq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMQ"]], "svmrae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMRAE"]], "t50 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.T50"]], "thresholdoptimization (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[11, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.X"]], "aggregate() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.aggregate"]], "aggregate() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.aggregate"]], "aggregative (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.aggregative"]], "classes (quapy.method.base.onevsallgeneric property)": [[11, "quapy.method.base.OneVsAllGeneric.classes"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.aggregative.onevsall property)": [[11, "quapy.method.aggregative.OneVsAll.classes_"]], "classes_ (quapy.method.neural.quanettrainer property)": [[11, "quapy.method.neural.QuaNetTrainer.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classify() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.classify"]], "classify() (quapy.method.aggregative.aggregativeprobabilisticquantifier method)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier.classify"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.classify"]], "classify() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.classify"]], "classify() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.classify"]], "clean_checkpoint() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint_dir"]], "cross_generate_predictions() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions"]], "cross_generate_predictions_depr() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions_depr"]], "device (quapy.method.neural.quanetmodule property)": [[11, "quapy.method.neural.QuaNetModule.device"]], "ensemblefactory() (in module quapy.method.meta)": [[11, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.fit"]], "fit() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.fit"]], "fit() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.fit"]], "fit() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.fit"]], "fit() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.fit"]], "fit() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.fit"]], "fit() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.fit"]], "fit() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.fit"]], "fit() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.fit"]], "fit() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.fit"]], "fit() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.fit"]], "fit() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "forward() (quapy.method.neural.quanetmodule method)": [[11, "quapy.method.neural.QuaNetModule.forward"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[11, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_params() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.get_params"]], "get_params() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.get_params"]], "get_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.get_params"]], "get_probability_distribution() (in module quapy.method.meta)": [[11, "quapy.method.meta.get_probability_distribution"]], "mae_loss() (in module quapy.method.neural)": [[11, "quapy.method.neural.mae_loss"]], "predict_proba() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quapy.method": [[11, "module-quapy.method"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]], "set_params() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.set_params"]], "set_params() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.set_params"]], "set_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.set_params"]], "solve_adjustment() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.solve_adjustment"]], "training (quapy.method.neural.quanetmodule attribute)": [[11, "quapy.method.neural.QuaNetModule.training"]]}}) \ No newline at end of file diff --git a/examples/custom_quantifier.py b/examples/custom_quantifier.py new file mode 100644 index 0000000..a025b87 --- /dev/null +++ b/examples/custom_quantifier.py @@ -0,0 +1,69 @@ +import quapy as qp +from data import LabelledCollection +from method.base import BaseQuantifier, BinaryQuantifier +from model_selection import GridSearchQ +from quapy.method.aggregative import PACC, AggregativeProbabilisticQuantifier +from quapy.protocol import APP +import numpy as np +from sklearn.linear_model import LogisticRegression + + +# Define a custom quantifier: for this example, we will consider a new quantification algorithm that uses a +# logistic regressor for generating posterior probabilities, and then applies a custom threshold value to the +# posteriors. Since the quantifier internally uses a classifier, it is an aggregative quantifier; and since it +# relies on posterior probabilities, then it is a probabilistic aggregative quantifier. Note also it has an +# internal hyperparameter (let say, alpha) which is the decision threshold. Let's also assume the quantifier +# is binary, for simplicity. + +class MyQuantifier(AggregativeProbabilisticQuantifier, BinaryQuantifier): + def __init__(self, classifier, alpha=0.5): + self.alpha = alpha + # aggregative quantifiers have an internal self.classifier attribute + self.classifier = classifier + + def fit(self, data: LabelledCollection, fit_classifier=True): + assert fit_classifier, 'this quantifier needs to fit the classifier!' + self.classifier.fit(*data.Xy) + return self + + # in general, we would need to implement the method quantify(self, instances) but, since this method is of + # type aggregative, we can simply implement the method aggregate, which has the following interface + def aggregate(self, classif_predictions: np.ndarray): + # the posterior probabilities have already been generated by the quantify method; we only need to + # specify what to do with them + positive_probabilities = classif_predictions[:, 1] + crisp_decisions = positive_probabilities > self.alpha + pos_prev = crisp_decisions.mean() + neg_prev = 1-pos_prev + return np.asarray([neg_prev, pos_prev]) + + +if __name__ == '__main__': + + qp.environ['SAMPLE_SIZE'] = 100 + + # define an instance of our custom quantifier + quantifier = MyQuantifier(LogisticRegression(), alpha=0.5) + + # load the IMDb dataset + train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test + train, val = train.split_stratified(train_prop=0.75) + + # model selection + # let us assume we want to explore our hyperparameter alpha along with one hyperparameter of the classifier + param_grid = { + 'alpha': np.linspace(0,1,11), # quantifier-dependent hyperparameter + 'classifier__C': np.logspace(-2,2,5) # classifier-dependent hyperparameter + } + quantifier = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=True).fit(train) + + # evaluation + mae = qp.evaluation.evaluate(quantifier, protocol=APP(test), error_metric='mae') + + print(f'MAE = {mae:.4f}') + + # final remarks: this method is only for demonstration purposes and makes little sense in general. The method relies + # on an hyperparameter alpha for binarizing the posterior probabilities. A much better way for fulfilling this + # goal would be to calibrate the classifier (LogisticRegression is already reasonably well calibrated) and then + # simply cut at 0.5. + diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index f2deea0..3fb21f6 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -1,11 +1,19 @@ -# main changes in 0.1.7 +Change Log 0.1.7 +--------------------- -- Protocols are now abstracted as AbstractProtocol. There is a new class extending AbstractProtocol called +- Protocols are now abstracted as instances of AbstractProtocol. There is a new class extending AbstractProtocol called AbstractStochasticSeededProtocol, which implements a seeding policy to allow replicate the series of samplings. - There are some examples of protocols, APP, NPP, USimplexPP, CovariateShiftPP (experimental). + There are some examples of protocols, APP, NPP, USimplexPP, DomainMixer (experimental). The idea is to start the sampling by simply calling the __call__ method. This change has a great impact in the framework, since many functions in qp.evaluation, qp.model_selection, - and sampling functions in LabelledCollection make use of the old functions. + and sampling functions in LabelledCollection relied of the old functions. E.g., the functionality of + qp.evaluation.artificial_prevalence_report or qp.evaluation.natural_prevalence_report is now obtained by means of + qp.evaluation.report which takes a protocol as an argument. I have not maintained compatibility with the old + interfaces because I did not really like them. Check the wiki guide and the examples for more details. + + check guides + + check examples - ACC, PACC, Forman's threshold variants have been parallelized. @@ -51,47 +59,31 @@ multiclass quantification. That is to say, one could get a multiclass variant of the (originally binary) HDy method aligned with the Firat's formulation. +- internal method properties "binary", "aggregative", and "probabilistic" have been removed; these conditions are + checked via isinstance + +- quantifiers (i.e., classes that inherit from BaseQuantifier) are not forced to implement classes_ or n_classes; + these can be used anyway internally, but the framework will not suppose (nor impose) that a quantifier implements + them + +- qp.evaluation.prediction has been optimized so that, if a quantifier is of type aggregative, and if the evaluation + protocol is of type OnLabelledCollection, then the computation is faster. In this specific case, the predictions + are issued only once and for all, and not for each sample. An exception to this (which is implement also), is + when the number of instances across all samples is anyway smaller than the number of instances in the original + labelled collection; in this case the heuristic is of no help, and is therefore not applied. + +- the distinction between "classify" and "posterior_probabilities" has been removed in Aggregative quantifiers, + so that probabilistic classifiers return posterior probabilities, while non-probabilistic quantifiers + return crisp decisions. + Things to fix: -- calibration with recalibration methods has to be fixed for exact_train_prev in EMQ (conflicts with clone, deepcopy, etc.) -- clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance(): - this is not working; I don't know how to make the isinstance work. Looks like there is some problem with the - path of the imported class wrt the path of the class that arrives from another module... -- clean classes_ and n_classes from methods (maybe not from aggregative ones, but those have to be used only - internally and not imposed in any abstract class) -- optimize "qp.evaluation.prediction" for aggregative methods (pre-classification) +-------------- +- OneVsAll is duplicated (in aggregative and in general), and is not well documented. It is not working either. + Check method def __parallel(self, func, *args, **kwargs) in aggregative.OneVsAll - update unit tests -- Policies should be able to set their output to "labelled_collection" or "instances_prevalence" or something similar. -- Policies should implement the "gen()" one, taking a reader function as an input, and a folder path maybe -- Review all documentation, redo the Sphinx doc, update Wikis... +- update Wikis... - Resolve the OneVsAll thing (it is in base.py and in aggregative.py) -- Better handle the environment (e.g., with n_jobs) -- test cross_generate_predictions and cancel cross_generate_predictions_depr - Add a proper log? -- test LoadSamplesFromDirectory (in protocols.py) -- improve plots? -- I have removed the distinction between "classify" and "posterior_probabilities" in the Aggregative quantifiers, - so that probabilistic classifiers actually return posterior probabilities, while non-probabilistic quantifiers - return instead crisp decisions. The idea was to unify the quantification function (i.e., now it is always - classify & aggregate, irrespective of the class). However, this has caused a problem with OneVsAll. This has to - be checked, since it is now innecessarily complicated (it also has old references to .probabilistic, and all this - stuff). -- Check method def __parallel(self, func, *args, **kwargs) in aggregative.OneVsAll +- improve plots +- documentation of protocols is incomplete -New features: -- Add LeQua2022 to datasets (everything automatic, and with proper protocols "gen") -- Add an "experimental room", with scripts to quickly test new ideas and see results. - -# 0.1.7 -# change the LabelledCollection API (removing protocol-related samplings) -# need to change the two references to the above in the wiki / doc, and code examples... -# removed artificial_prevalence_sampling from functional - -# also: some parameters in the init could be used to indicate that the method should return a tuple with -# unlabelled instances and the vector of prevalence values (and not a LabelledCollection). -# Or: this can be done in a different function; i.e., we use one function (now __call__) to return -# LabelledCollections, and another new one for returning the other output, which is more general for -# evaluation purposes. - -# the so-called "gen" function has to be implemented as a protocol. The problem here is that this function -# should be able to return only unlabelled instances plus a vector of prevalences (and not LabelledCollections). -# This was coded as different functions in 0.1.6 diff --git a/quapy/__init__.py b/quapy/__init__.py index 54b1603..47a7388 100644 --- a/quapy/__init__.py +++ b/quapy/__init__.py @@ -23,9 +23,28 @@ environ = { } -def get_njobs(n_jobs): +def _get_njobs(n_jobs): + """ + If `n_jobs` is None, then it returns `environ['N_JOBS']`; if otherwise, returns `n_jobs`. + + :param n_jobs: the number of `n_jobs` or None if not specified + :return: int + """ return environ['N_JOBS'] if n_jobs is None else n_jobs +def _get_sample_size(sample_size): + """ + If `sample_size` is None, then it returns `environ['SAMPLE_SIZE']`; if otherwise, returns `sample_size`. + If none of these are set, then a ValueError exception is raised. + + :param sample_size: integer or None + :return: int + """ + sample_size = environ['SAMPLE_SIZE'] if sample_size is None else sample_size + if sample_size is None: + raise ValueError('neither sample_size nor qp.environ["SAMPLE_SIZE"] have been specified') + return sample_size + diff --git a/quapy/classification/calibration.py b/quapy/classification/calibration.py index 69a7e14..f35bb97 100644 --- a/quapy/classification/calibration.py +++ b/quapy/classification/calibration.py @@ -12,12 +12,18 @@ import numpy as np class RecalibratedProbabilisticClassifier: + """ + Abstract class for (re)calibration method from `abstention.calibration`, as defined in + `Alexandari, A., Kundaje, A., & Shrikumar, A. (2020, November). Maximum likelihood with bias-corrected calibration + is hard-to-beat at label shift adaptation. In International Conference on Machine Learning (pp. 222-232). PMLR. + `_: + """ pass class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabilisticClassifier): """ - Applies a (re)calibration method from abstention.calibration, as defined in + Applies a (re)calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier @@ -25,7 +31,7 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi :param val_split: indicate an integer k for performing kFCV to obtain the posterior probabilities, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer); default=None :param verbose: whether or not to display information in the standard output """ @@ -38,6 +44,13 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi self.verbose = verbose def fit(self, X, y): + """ + Fits the calibration for the probabilistic classifier. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ k = self.val_split if isinstance(k, int): if k < 2: @@ -49,6 +62,15 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi return self.fit_cv(X, y) def fit_cv(self, X, y): + """ + Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all + training instances via cross-validation, and then retrains the classifier on all training instances. + The posterior probabilities thus generated are used for calibrating the outpus of the classifier. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ posteriors = cross_val_predict( self.classifier, X, y, cv=self.val_split, n_jobs=self.n_jobs, verbose=self.verbose, method='predict_proba' ) @@ -58,6 +80,16 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi return self def fit_tr_val(self, X, y): + """ + Fits the calibration in a train/val-split manner, i.e.t, it partitions the training instances into a + training and a validation set, and then uses the training samples to learn classifier which is then used + to generate posterior probabilities for the held-out validation data. These posteriors are used to calibrate + the classifier. The classifier is not retrained on the whole dataset. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=self.val_split, stratify=y) self.classifier.fit(Xtr, ytr) posteriors = self.classifier.predict_proba(Xva) @@ -66,32 +98,49 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi return self def predict(self, X): + """ + Predicts class labels for the data instances in `X` + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :return: array-like of shape `(n_samples,)` with the class label predictions + """ return self.classifier.predict(X) def predict_proba(self, X): + """ + Generates posterior probabilities for the data instances in `X` + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :return: array-like of shape `(n_samples, n_classes)` with posterior probabilities + """ posteriors = self.classifier.predict_proba(X) return self.calibration_function(posteriors) @property def classes_(self): + """ + Returns the classes on which the classifier has been trained on + + :return: array-like of shape `(n_classes)` + """ return self.classifier.classes_ class NBVSCalibration(RecalibratedProbabilisticClassifierBase): """ - Applies the No-Bias Vector Scaling (NBVS) calibration method from abstention.calibration, as defined in + Applies the No-Bias Vector Scaling (NBVS) calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) :param verbose: whether or not to display information in the standard output """ - def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): self.classifier = classifier self.calibrator = NoBiasVectorScaling(verbose=verbose) self.val_split = val_split @@ -101,19 +150,19 @@ class NBVSCalibration(RecalibratedProbabilisticClassifierBase): class BCTSCalibration(RecalibratedProbabilisticClassifierBase): """ - Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from abstention.calibration, as defined in + Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) :param verbose: whether or not to display information in the standard output """ - def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): self.classifier = classifier self.calibrator = TempScaling(verbose=verbose, bias_positions='all') self.val_split = val_split @@ -123,19 +172,19 @@ class BCTSCalibration(RecalibratedProbabilisticClassifierBase): class TSCalibration(RecalibratedProbabilisticClassifierBase): """ - Applies the Temperature Scaling (TS) calibration method from abstention.calibration, as defined in + Applies the Temperature Scaling (TS) calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) :param verbose: whether or not to display information in the standard output """ - def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): self.classifier = classifier self.calibrator = TempScaling(verbose=verbose) self.val_split = val_split @@ -145,19 +194,19 @@ class TSCalibration(RecalibratedProbabilisticClassifierBase): class VSCalibration(RecalibratedProbabilisticClassifierBase): """ - Applies the Vector Scaling (VS) calibration method from abstention.calibration, as defined in + Applies the Vector Scaling (VS) calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) :param verbose: whether or not to display information in the standard output """ - def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): self.classifier = classifier self.calibrator = VectorScaling(verbose=verbose) self.val_split = val_split diff --git a/quapy/classification/svmperf.py b/quapy/classification/svmperf.py index 2f6ad90..176b102 100644 --- a/quapy/classification/svmperf.py +++ b/quapy/classification/svmperf.py @@ -94,6 +94,7 @@ class SVMperf(BaseEstimator, ClassifierMixin): def predict(self, X): """ Predicts labels for the instances `X` + :param X: array-like of shape `(n_samples, n_features)` instances to classify :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of instances in `X` diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index b35343b..241cd04 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -554,7 +554,31 @@ def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float): def fetch_lequa2022(task, data_home=None): """ + Loads the official datasets provided for the `LeQua `_ competition. + In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification + problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead. + Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification + problems consisting of estimating the class prevalence values of 28 different merchandise products. + We refer to the `Esuli, A., Moreo, A., Sebastiani, F., & Sperduti, G. (2022). + A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify. + `_ for a detailed description + on the tasks and datasets. + + The datasets are downloaded only once, and stored for fast reuse. + + See `lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these + datasets. + + + :param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of + :class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of + :class:`quapy.protocol.SamplesFromDir`, i.e., are sampling protocols that return a series of samples + labelled by prevalence. """ + from quapy.data._lequa2022 import load_raw_documents, load_vector_documents, SamplesFromDir assert task in LEQUA2022_TASKS, \ diff --git a/quapy/data/preprocessing.py b/quapy/data/preprocessing.py index a987900..e65ccf7 100644 --- a/quapy/data/preprocessing.py +++ b/quapy/data/preprocessing.py @@ -88,7 +88,7 @@ def standardize(dataset: Dataset, inplace=False): :param dataset: a :class:`quapy.data.base.Dataset` object :param inplace: set to True if the transformation is to be applied inplace, or to False (default) if a new :class:`quapy.data.base.Dataset` is to be returned - :return: + :return: an instance of :class:`quapy.data.base.Dataset` """ s = StandardScaler(copy=not inplace) training = s.fit_transform(dataset.training.instances) @@ -110,7 +110,7 @@ def index(dataset: Dataset, min_df=5, inplace=False, **kwargs): :param min_df: minimum number of occurrences below which the term is replaced by a `UNK` index :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default) :param kwargs: the rest of parameters of the transformation (as for sklearn's - `CountVectorizer _`) + `CountVectorizer _`) :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current :class:`quapy.data.base.Dataset` (inplace=True) consisting of lists of integer values representing indices. """ @@ -147,7 +147,8 @@ class IndexTransformer: contains, and that would be generated by sklearn's `CountVectorizer `_ - :param kwargs: keyworded arguments from `CountVectorizer `_ + :param kwargs: keyworded arguments from + `CountVectorizer `_ """ def __init__(self, **kwargs): @@ -179,7 +180,7 @@ class IndexTransformer: """ # given the number of tasks and the number of jobs, generates the slices for the parallel processes assert self.unk != -1, 'transform called before fit' - n_jobs = qp.get_njobs(n_jobs) + n_jobs = qp._get_njobs(n_jobs) indexed = map_parallel(func=self._index, args=X, n_jobs=n_jobs) return np.asarray(indexed) diff --git a/quapy/depr_evaluation.py b/quapy/depr_evaluation.py deleted file mode 100644 index 0846ab0..0000000 --- a/quapy/depr_evaluation.py +++ /dev/null @@ -1,439 +0,0 @@ -from typing import Union, Callable, Iterable -import numpy as np -from tqdm import tqdm -import inspect - -import quapy as qp -from quapy.data import LabelledCollection -from quapy.method.base import BaseQuantifier -from quapy.util import temp_seed -import quapy.functional as F -import pandas as pd - - -def artificial_prevalence_prediction( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - n_prevpoints=101, - repeats=1, - eval_budget: int = None, - n_jobs=1, - random_seed=42, - verbose=False): - """ - Performs the predictions for all samples generated according to the Artificial Prevalence Protocol (APP). - The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., - [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid - combination of prevalence values is indicated by `repeats`. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform APP - :param sample_size: integer, the size of the samples - :param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget - is specified; default 101, i.e., steps of 1%) - :param repeats: integer, the number of repetitions for each prevalence (default 1) - :param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if - there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this - will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and - since setting `n_prevpoints=6` would produce more than 20 evaluations. - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param verbose: if True, shows a progress bar - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - `(n_prevpoints*repeats)` and `n` the number of classes. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - """ - - n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, repeats, verbose) - - with temp_seed(random_seed): - indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, repeats)) - - return _predict_from_indexes(indexes, model, test, n_jobs, verbose) - - -def natural_prevalence_prediction( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - repeats, - n_jobs=1, - random_seed=42, - verbose=False): - """ - Performs the predictions for all samples generated according to the Natural Prevalence Protocol (NPP). - The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural - prevalence of the collection. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform NPP - :param sample_size: integer, the size of the samples - :param repeats: integer, the number of samples to generate - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param verbose: if True, shows a progress bar - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - `(repeats)` and `n` the number of classes. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - """ - - with temp_seed(random_seed): - indexes = list(test.natural_sampling_index_generator(sample_size, repeats)) - - return _predict_from_indexes(indexes, model, test, n_jobs, verbose) - - -def gen_prevalence_prediction(model: BaseQuantifier, gen_fn: Callable, eval_budget=None): - """ - Generates prevalence predictions for a custom protocol defined as a generator function that yields - samples at each iteration. The sequence of samples is processed exhaustively if `eval_budget=None` - or up to the `eval_budget` iterations if specified. - - :param model: the model in charge of generating the class prevalence estimations - :param gen_fn: a generator function yielding one sample at each iteration - :param eval_budget: a maximum number of evaluations to run. Set to None (default) for exploring the - entire sequence - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - generated and `n` the number of classes. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - """ - if not inspect.isgenerator(gen_fn()): - raise ValueError('param "gen_fun" is not a callable returning a generator') - - if not isinstance(eval_budget, int): - eval_budget = -1 - - true_prevalences, estim_prevalences = [], [] - for sample_instances, true_prev in gen_fn(): - true_prevalences.append(true_prev) - estim_prevalences.append(model.quantify(sample_instances)) - eval_budget -= 1 - if eval_budget == 0: - break - - true_prevalences = np.asarray(true_prevalences) - estim_prevalences = np.asarray(estim_prevalences) - - return true_prevalences, estim_prevalences - - -def _predict_from_indexes( - indexes, - model: BaseQuantifier, - test: LabelledCollection, - n_jobs=1, - verbose=False): - - if model.aggregative: #isinstance(model, qp.method.aggregative.AggregativeQuantifier): - # print('\tinstance of aggregative-quantifier') - quantification_func = model.aggregate - if model.probabilistic: # isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier): - # print('\t\tinstance of probabilitstic-aggregative-quantifier') - preclassified_instances = model.posterior_probabilities(test.instances) - else: - # print('\t\tinstance of hard-aggregative-quantifier') - preclassified_instances = model.classify(test.instances) - test = LabelledCollection(preclassified_instances, test.labels) - else: - # print('\t\tinstance of base-quantifier') - quantification_func = model.quantify - - def _predict_prevalences(index): - sample = test.sampling_from_index(index) - true_prevalence = sample.prevalence() - estim_prevalence = quantification_func(sample.instances) - return true_prevalence, estim_prevalence - - pbar = tqdm(indexes, desc='[artificial sampling protocol] generating predictions') if verbose else indexes - results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs) - - true_prevalences, estim_prevalences = zip(*results) - true_prevalences = np.asarray(true_prevalences) - estim_prevalences = np.asarray(estim_prevalences) - - return true_prevalences, estim_prevalences - - -def artificial_prevalence_report( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - n_prevpoints=101, - repeats=1, - eval_budget: int = None, - n_jobs=1, - random_seed=42, - error_metrics:Iterable[Union[str,Callable]]='mae', - verbose=False): - """ - Generates an evaluation report for all samples generated according to the Artificial Prevalence Protocol (APP). - The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., - [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid - combination of prevalence values is indicated by `repeats`. - Te report takes the form of a - pandas' `dataframe `_ - in which the rows correspond to different samples, and the columns inform of the true prevalence values, - the estimated prevalence values, and the score obtained by each of the evaluation measures indicated. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform APP - :param sample_size: integer, the size of the samples - :param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget - is specified; default 101, i.e., steps of 1%) - :param repeats: integer, the number of repetitions for each prevalence (default 1) - :param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if - there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this - will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and - since setting `n_prevpoints=6` would produce more than 20 evaluations. - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param error_metrics: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function; optionally, a list of strings or callables can be indicated, if the results - are to be evaluated with more than one error metric. Default is "mae" - :param verbose: if True, shows a progress bar - :return: pandas' dataframe with rows corresponding to different samples, and with columns informing of the - true prevalence values, the estimated prevalence values, and the score obtained by each of the evaluation - measures indicated. - """ - - true_prevs, estim_prevs = artificial_prevalence_prediction( - model, test, sample_size, n_prevpoints, repeats, eval_budget, n_jobs, random_seed, verbose - ) - return _prevalence_report(true_prevs, estim_prevs, error_metrics) - - -def natural_prevalence_report( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - repeats=1, - n_jobs=1, - random_seed=42, - error_metrics:Iterable[Union[str,Callable]]='mae', - verbose=False): - """ - Generates an evaluation report for all samples generated according to the Natural Prevalence Protocol (NPP). - The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural - prevalence of the collection. - Te report takes the form of a - pandas' `dataframe `_ - in which the rows correspond to different samples, and the columns inform of the true prevalence values, - the estimated prevalence values, and the score obtained by each of the evaluation measures indicated. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform NPP - :param sample_size: integer, the size of the samples - :param repeats: integer, the number of samples to generate - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param error_metrics: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function; optionally, a list of strings or callables can be indicated, if the results - are to be evaluated with more than one error metric. Default is "mae" - :param verbose: if True, shows a progress bar - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - `(repeats)` and `n` the number of classes. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - - """ - - true_prevs, estim_prevs = natural_prevalence_prediction( - model, test, sample_size, repeats, n_jobs, random_seed, verbose - ) - return _prevalence_report(true_prevs, estim_prevs, error_metrics) - - -def gen_prevalence_report(model: BaseQuantifier, gen_fn: Callable, eval_budget=None, - error_metrics:Iterable[Union[str,Callable]]='mae'): - """ - GGenerates an evaluation report for a custom protocol defined as a generator function that yields - samples at each iteration. The sequence of samples is processed exhaustively if `eval_budget=None` - or up to the `eval_budget` iterations if specified. - Te report takes the form of a - pandas' `dataframe `_ - in which the rows correspond to different samples, and the columns inform of the true prevalence values, - the estimated prevalence values, and the score obtained by each of the evaluation measures indicated. - - :param model: the model in charge of generating the class prevalence estimations - :param gen_fn: a generator function yielding one sample at each iteration - :param eval_budget: a maximum number of evaluations to run. Set to None (default) for exploring the - entire sequence - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - generated. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - """ - true_prevs, estim_prevs = gen_prevalence_prediction(model, gen_fn, eval_budget) - return _prevalence_report(true_prevs, estim_prevs, error_metrics) - - -def _prevalence_report( - true_prevs, - estim_prevs, - error_metrics: Iterable[Union[str, Callable]] = 'mae'): - - if isinstance(error_metrics, str): - error_metrics = [error_metrics] - - error_names = [e if isinstance(e, str) else e.__name__ for e in error_metrics] - error_funcs = [qp.error.from_name(e) if isinstance(e, str) else e for e in error_metrics] - assert all(hasattr(e, '__call__') for e in error_funcs), 'invalid error functions' - - df = pd.DataFrame(columns=['true-prev', 'estim-prev'] + error_names) - for true_prev, estim_prev in zip(true_prevs, estim_prevs): - series = {'true-prev': true_prev, 'estim-prev': estim_prev} - for error_name, error_metric in zip(error_names, error_funcs): - score = error_metric(true_prev, estim_prev) - series[error_name] = score - df = df.append(series, ignore_index=True) - - return df - - -def artificial_prevalence_protocol( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - n_prevpoints=101, - repeats=1, - eval_budget: int = None, - n_jobs=1, - random_seed=42, - error_metric:Union[str,Callable]='mae', - verbose=False): - """ - Generates samples according to the Artificial Prevalence Protocol (APP). - The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., - [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid - combination of prevalence values is indicated by `repeats`. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform APP - :param sample_size: integer, the size of the samples - :param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget - is specified; default 101, i.e., steps of 1%) - :param repeats: integer, the number of repetitions for each prevalence (default 1) - :param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if - there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this - will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and - since setting `n_prevpoints=6` would produce more than 20 evaluations. - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function - :param verbose: set to True (default False) for displaying some information on standard output - :return: yields one sample at a time - """ - - if isinstance(error_metric, str): - error_metric = qp.error.from_name(error_metric) - - assert hasattr(error_metric, '__call__'), 'invalid error function' - - true_prevs, estim_prevs = artificial_prevalence_prediction( - model, test, sample_size, n_prevpoints, repeats, eval_budget, n_jobs, random_seed, verbose - ) - - return error_metric(true_prevs, estim_prevs) - - -def natural_prevalence_protocol( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - repeats=1, - n_jobs=1, - random_seed=42, - error_metric:Union[str,Callable]='mae', - verbose=False): - """ - Generates samples according to the Natural Prevalence Protocol (NPP). - The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural - prevalence of the collection. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform NPP - :param sample_size: integer, the size of the samples - :param repeats: integer, the number of samples to generate - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function - :param verbose: if True, shows a progress bar - :return: yields one sample at a time - """ - - if isinstance(error_metric, str): - error_metric = qp.error.from_name(error_metric) - - assert hasattr(error_metric, '__call__'), 'invalid error function' - - true_prevs, estim_prevs = natural_prevalence_prediction( - model, test, sample_size, repeats, n_jobs, random_seed, verbose - ) - - return error_metric(true_prevs, estim_prevs) - - -def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], error_metric:Union[str, Callable], n_jobs:int=-1): - """ - Evaluates a model on a sequence of test samples in terms of a given error metric. - - :param model: the model in charge of generating the class prevalence estimations - :param test_samples: an iterable yielding one sample at a time - :param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :return: the score obtained using `error_metric` - """ - if isinstance(error_metric, str): - error_metric = qp.error.from_name(error_metric) - scores = qp.util.parallel(_delayed_eval, ((model, Ti, error_metric) for Ti in test_samples), n_jobs=n_jobs) - return np.mean(scores) - - -def _delayed_eval(args): - model, test, error = args - prev_estim = model.quantify(test.instances) - prev_true = test.prevalence() - return error(prev_true, prev_estim) - - -def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, repeats=1, verbose=False): - if n_prevpoints is None and eval_budget is None: - raise ValueError('either n_prevpoints or eval_budget has to be specified') - elif n_prevpoints is None: - assert eval_budget > 0, 'eval_budget must be a positive integer' - n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats) - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'setting n_prevpoints={n_prevpoints} so that the number of ' - f'evaluations ({eval_computations}) does not exceed the evaluation ' - f'budget ({eval_budget})') - elif eval_budget is None: - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'{eval_computations} evaluations will be performed for each ' - f'combination of hyper-parameters') - else: - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if eval_computations > eval_budget: - n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats) - new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'the budget of evaluations would be exceeded with ' - f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce ' - f'{new_eval_computations} evaluation computations for each hyper-parameter combination.') - return n_prevpoints, eval_computations - diff --git a/quapy/error.py b/quapy/error.py index 2047929..c0cd157 100644 --- a/quapy/error.py +++ b/quapy/error.py @@ -11,11 +11,6 @@ def from_name(err_name): """ assert err_name in ERROR_NAMES, f'unknown error {err_name}' callable_error = globals()[err_name] - # if err_name in QUANTIFICATION_ERROR_SMOOTH_NAMES: - # eps = __check_eps() - # def bound_callable_error(y_true, y_pred): - # return callable_error(y_true, y_pred, eps) - # return bound_callable_error return callable_error diff --git a/quapy/functional.py b/quapy/functional.py index 3ee46ff..a1f0ba2 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -70,7 +70,7 @@ def HellingerDistance(P, Q): The HD for two discrete distributions of `k` bins is defined as: .. math:: - HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 } + HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \\sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 } :param P: real-valued array-like of shape `(k,)` representing a discrete distribution :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution @@ -78,11 +78,21 @@ def HellingerDistance(P, Q): """ return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2)) + def TopsoeDistance(P, Q, epsilon=1e-20): - """ Topsoe """ - return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + - Q*np.log((2*Q+epsilon)/(P+Q+epsilon))) + Topsoe distance between two (discretized) distributions `P` and `Q`. + The Topsoe distance for two discrete distributions of `k` bins is defined as: + + .. math:: + Topsoe(P,Q) = \\sum_{i=1}^k \\left( p_i \\log\\left(\\frac{ 2 p_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) + + q_i \\log\\left(\\frac{ 2 q_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) \\right) + + :param P: real-valued array-like of shape `(k,)` representing a discrete distribution + :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution + :return: float + """ + return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + Q*np.log((2*Q+epsilon)/(P+Q+epsilon))) def uniform_prevalence_sampling(n_classes, size=1): @@ -136,7 +146,6 @@ def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True): .. math:: ACC(p) = \\frac{ p - fpr }{ tpr - fpr } - :param prevalence_estim: float, the estimated value for the positive class :param tpr: float, the true positive rate of the classifier :param fpr: float, the false positive rate of the classifier @@ -184,7 +193,7 @@ def __num_prevalence_combinations_depr(n_prevpoints:int, n_classes:int, n_repeat :param n_prevpoints: integer, number of prevalence points. :param n_repeats: integer, number of repetitions for each prevalence combination :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the - number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] + number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] """ __cache={} def __f(nc,np): @@ -216,7 +225,7 @@ def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1 :param n_prevpoints: integer, number of prevalence points. :param n_repeats: integer, number of repetitions for each prevalence combination :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the - number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] + number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] """ N = n_prevpoints-1 C = n_classes @@ -230,7 +239,7 @@ def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repe that the number of valid prevalence values generated as combinations of prevalence points (points in a `n_classes`-dimensional simplex) do not exceed combinations_budget. - :param combinations_budget: integer, maximum number of combinatios allowed + :param combinations_budget: integer, maximum number of combinations allowed :param n_classes: integer, number of classes :param n_repeats: integer, number of repetitions for each prevalence combination :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences @@ -248,6 +257,7 @@ def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repe def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08): """ Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1. + :param p: the prevalence vector to check :return: True if `p` is valid, False otherwise """ @@ -265,3 +275,4 @@ def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08): raise ValueError('the prevalence vector does not sum up to 1') return False return True + diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 87b682e..a9a93cb 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -76,7 +76,7 @@ class AggregativeQuantifier(BaseQuantifier): by the classifier. :param instances: array-like - :return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates. + :return: `np.ndarray` of shape `(n_classes)` with class prevalence estimates. """ classif_predictions = self.classify(instances) return self.aggregate(classif_predictions) @@ -87,7 +87,7 @@ class AggregativeQuantifier(BaseQuantifier): Implements the aggregation of label predictions. :param classif_predictions: `np.ndarray` of label predictions - :return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates. + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. """ ... @@ -113,19 +113,6 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier): def classify(self, instances): return self.classifier.predict_proba(instances) - # def set_params(self, **parameters): - # if isinstance(self.classifier, CalibratedClassifierCV): - # if self.classifier.get_params().get('base_estimator') == 'deprecated': - # key_prefix = 'estimator__' # this has changed in the newer versions of sklearn - # else: - # key_prefix = 'base_estimator__' - # parameters = {key_prefix + k: v for k, v in parameters.items()} - # elif isinstance(self.classifier, RecalibratedClassifier): - # parameters = {'estimator__' + k: v for k, v in parameters.items()} - # - # self.classifier.set_params(**parameters) - # return self - # Helper # ------------------------------------ @@ -198,7 +185,7 @@ def cross_generate_predictions( n_jobs ): - n_jobs = qp.get_njobs(n_jobs) + n_jobs = qp._get_njobs(n_jobs) if isinstance(val_split, int): assert fit_classifier == True, \ @@ -305,7 +292,7 @@ class CC(AggregativeQuantifier): Computes class prevalence estimates by counting the prevalence of each of the predicted labels. :param classif_predictions: array-like with label predictions - :return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates. + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. """ return F.prevalence_from_labels(classif_predictions, self.classes_) @@ -328,7 +315,7 @@ class ACC(AggregativeQuantifier): def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): self.classifier = classifier self.val_split = val_split - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): """ @@ -435,7 +422,7 @@ class PACC(AggregativeProbabilisticQuantifier): def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): self.classifier = classifier self.val_split = val_split - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): """ @@ -660,6 +647,20 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier): return np.asarray([1 - class1_prev, class1_prev]) +def _get_divergence(divergence: Union[str, Callable]): + if isinstance(divergence, str): + if divergence=='HD': + return F.HellingerDistance + elif divergence=='topsoe': + return F.TopsoeDistance + else: + raise ValueError(f'unknown divergence {divergence}') + elif callable(divergence): + return divergence + else: + raise ValueError(f'argument "divergence" not understood; use a str or a callable function') + + class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): """ `DyS framework `_ (DyS). @@ -765,25 +766,13 @@ class SMM(AggregativeProbabilisticQuantifier, BinaryQuantifier): return np.asarray([1 - class1_prev, class1_prev]) -def _get_divergence(divergence: Union[str, Callable]): - if isinstance(divergence, str): - if divergence=='HD': - return F.HellingerDistance - elif divergence=='topsoe': - return F.TopsoeDistance - else: - raise ValueError(f'unknown divergence {divergence}') - elif callable(divergence): - return divergence - else: - raise ValueError(f'argument "divergence" not understood; use a str or a callable function') class DistributionMatching(AggregativeProbabilisticQuantifier): """ Generic Distribution Matching quantifier for binary or multiclass quantification. This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters. - :param classifier: a sklearn's Estimator that generates a probabilistic classifier + :param classifier: a `sklearn`'s Estimator that generates a probabilistic classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set to model the validation distribution. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -799,7 +788,6 @@ class DistributionMatching(AggregativeProbabilisticQuantifier): """ def __init__(self, classifier, val_split=0.4, nbins=8, divergence: Union[str, Callable]='HD', cdf=False, n_jobs=None): - self.classifier = classifier self.val_split = val_split self.nbins = nbins @@ -1020,7 +1008,7 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): self.classifier = classifier self.val_split = val_split - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): self._check_binary(data, "Threshold Optimization") @@ -1277,7 +1265,7 @@ class OneVsAll(AggregativeQuantifier): assert isinstance(self.binary_quantifier, AggregativeQuantifier), \ f'{self.binary_quantifier} does not seem to be of type Aggregative' self.binary_quantifier = binary_quantifier - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_classifier=True): assert not data.binary, \ diff --git a/quapy/method/base.py b/quapy/method/base.py index 459130c..a80f7b7 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -32,29 +32,10 @@ class BaseQuantifier(BaseEstimator): Generate class prevalence estimates for the sample's instances :param instances: array-like - :return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates. + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. """ ... - # @abstractmethod - # def set_params(self, **parameters): - # """ - # Set the parameters of the quantifier. - # - # :param parameters: dictionary of param-value pairs - # """ - # ... - # - # @abstractmethod - # def get_params(self, deep=True): - # """ - # Return the current parameters of the quantifier. - # - # :param deep: for compatibility with sklearn - # :return: a dictionary of param-value pairs - # """ - # ... - class BinaryQuantifier(BaseQuantifier): """ @@ -77,7 +58,7 @@ class OneVsAllGeneric: assert isinstance(binary_quantifier, BaseQuantifier), \ f'{binary_quantifier} does not seem to be a Quantifier' self.binary_quantifier = binary_quantifier - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, **kwargs): assert not data.binary, \ diff --git a/quapy/method/meta.py b/quapy/method/meta.py index 82d3a35..ba682ee 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -84,7 +84,7 @@ class Ensemble(BaseQuantifier): self.red_size = red_size self.policy = policy self.val_split = val_split - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) self.post_proba_fn = None self.verbose = verbose self.max_sample_size = max_sample_size @@ -147,7 +147,7 @@ class Ensemble(BaseQuantifier): with the abstract class). Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for - classification (not recommended). + classification (not recommended). :param parameters: dictionary :return: raises an Exception @@ -163,10 +163,12 @@ class Ensemble(BaseQuantifier): with the abstract class). Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for - classification (not recommended). + classification (not recommended). + :param deep: for compatibility with scikit-learn :return: raises an Exception """ + raise NotImplementedError() def _accuracy_policy(self, error_name): diff --git a/quapy/method/non_aggregative.py b/quapy/method/non_aggregative.py index f70a0c6..0a8680d 100644 --- a/quapy/method/non_aggregative.py +++ b/quapy/method/non_aggregative.py @@ -21,7 +21,6 @@ class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier): :param data: the training sample :return: self """ - self._classes_ = data.classes_ self.estimated_prevalence = data.prevalence() return self @@ -34,29 +33,3 @@ class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier): """ return self.estimated_prevalence - @property - def classes_(self): - """ - Number of classes - - :return: integer - """ - - return self._classes_ - - def get_params(self, deep=True): - """ - Does nothing, since this learner has no parameters. - - :param deep: for compatibility with sklearn - :return: `None` - """ - return None - - def set_params(self, **parameters): - """ - Does nothing, since this learner has no parameters. - - :param parameters: dictionary of param-value pairs (ignored) - """ - pass diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 3cb22c7..b8b9282 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -49,7 +49,7 @@ class GridSearchQ(BaseQuantifier): self.protocol = protocol self.refit = refit self.timeout = timeout - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) self.verbose = verbose self.__check_error(error) assert isinstance(protocol, AbstractProtocol), 'unknown protocol' diff --git a/quapy/protocol.py b/quapy/protocol.py index b30165f..1dec78b 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -11,13 +11,17 @@ from glob import glob class AbstractProtocol(metaclass=ABCMeta): + """ + Abstract parent class for sample generation protocols. + """ @abstractmethod def __call__(self): """ - Implements the protocol. Yields one sample at a time + Implements the protocol. Yields one sample at a time along with its prevalence - :return: yields one sample at a time + :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances + and in which `prev` is an `nd.array` with the class prevalence values """ ... @@ -32,9 +36,10 @@ class AbstractProtocol(metaclass=ABCMeta): class AbstractStochasticSeededProtocol(AbstractProtocol): """ - An AbstractStochasticSeededProtocol is a protocol that generates, via any random procedure (e.g., - via random sapling), sequences of `LabelledCollection` samples. The protocol abstraction enforces - the object to be instantiated using a seed, so that the sequence can be completely replicated. + An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g., + via random sampling), sequences of :class:`quapy.data.base.LabelledCollection` samples. + The protocol abstraction enforces + the object to be instantiated using a seed, so that the sequence can be fully replicated. In order to make this functionality possible, the classes extending this abstraction need to implement only two functions, :meth:`samples_parameters` which generates all the parameters needed for extracting the samples, and :meth:`sample` that, given some parameters as input, @@ -128,7 +133,8 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): combination of prevalence values is indicated by `repeats`. :param data: a `LabelledCollection` from which the samples will be drawn - :param sample_size: integer, number of instances in each sample + :param sample_size: integer, number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the grid (default is 21) :param repeats: number of copies for each valid prevalence vector (default is 10) @@ -138,10 +144,11 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=None, return_type='sample_prev'): + def __init__(self, data:LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, + smooth_limits_epsilon=0, random_state=None, return_type='sample_prev'): super(APP, self).__init__(random_state) self.data = data - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.n_prevalences = n_prevalences self.repeats = repeats self.smooth_limits_epsilon = smooth_limits_epsilon @@ -191,17 +198,18 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. :param data: a `LabelledCollection` from which the samples will be drawn - :param sample_size: integer, the number of instances in each sample + :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. :param repeats: the number of samples to generate. Default is 100. :param random_state: allows replicating samples across runs (default None) :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data:LabelledCollection, sample_size, repeats=100, random_state=None, return_type='sample_prev'): + def __init__(self, data:LabelledCollection, sample_size=None, repeats=100, random_state=None, return_type='sample_prev'): super(NPP, self).__init__(random_state) self.data = data - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.repeats = repeats self.random_state = random_state self.collator = OnLabelledCollectionProtocol.get_collator(return_type) @@ -230,17 +238,19 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) combinations of the grid values of APP makes this endeavour intractable. :param data: a `LabelledCollection` from which the samples will be drawn - :param sample_size: integer, the number of instances in each sample + :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. :param repeats: the number of samples to generate. Default is 100. :param random_state: allows replicating samples across runs (default None) :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data: LabelledCollection, sample_size, repeats=100, random_state=None, return_type='sample_prev'): + def __init__(self, data: LabelledCollection, sample_size=None, repeats=100, random_state=None, + return_type='sample_prev'): super(USimplexPP, self).__init__(random_state) self.data = data - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.repeats = repeats self.random_state = random_state self.collator = OnLabelledCollectionProtocol.get_collator(return_type) @@ -259,32 +269,7 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) return self.repeats -# class LoadSamplesFromDirectory(AbstractProtocol): -# -# def __init__(self, folder_path, loader_fn, classes=None, **loader_kwargs): -# assert exists(folder_path), f'folder {folder_path} does not exist' -# assert callable(loader_fn), f'the passed load_fn does not seem to be callable' -# self.folder_path = folder_path -# self.loader_fn = loader_fn -# self.classes = classes -# self.loader_kwargs = loader_kwargs -# self._list_files = None -# -# def __call__(self): -# for file in self.list_files: -# yield LabelledCollection.load(file, loader_func=self.loader_fn, classes=self.classes, **self.loader_kwargs) -# -# @property -# def list_files(self): -# if self._list_files is None: -# self._list_files = sorted(glob(self.folder_path, '*')) -# return self._list_files -# -# def total(self): -# return len(self.list_files) - - -class CovariateShiftPP(AbstractStochasticSeededProtocol): +class DomainMixer(AbstractStochasticSeededProtocol): """ Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. @@ -311,10 +296,10 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): mixture_points=11, random_state=None, return_type='sample_prev'): - super(CovariateShiftPP, self).__init__(random_state) + super(DomainMixer, self).__init__(random_state) self.A = domainA self.B = domainB - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.repeats = repeats if prevalence is None: self.prevalence = domainA.prevalence() diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py index bcf721c..f13907c 100644 --- a/quapy/tests/test_methods.py +++ b/quapy/tests/test_methods.py @@ -4,6 +4,7 @@ from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC import quapy as qp +from quapy.method.base import BinaryQuantifier from quapy.data import Dataset, LabelledCollection from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS from quapy.method.aggregative import ACC, PACC, HDy @@ -21,7 +22,7 @@ learners = [LogisticRegression, LinearSVC] def test_aggregative_methods(dataset: Dataset, aggregative_method, learner): model = aggregative_method(learner()) - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {type(model)} on non-binary dataset {dataset}') return @@ -45,7 +46,7 @@ def test_elm_methods(dataset: Dataset, elm_method): print('Missing SVMperf binary program, skipping test') return - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return @@ -64,7 +65,7 @@ def test_elm_methods(dataset: Dataset, elm_method): def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method): model = non_aggregative_method() - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return @@ -85,7 +86,7 @@ def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method): def test_ensemble_method(base_method, learner, dataset: Dataset, policy): qp.environ['SAMPLE_SIZE'] = len(dataset.training) model = Ensemble(quantifier=base_method(learner()), size=5, policy=policy, n_jobs=-1) - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return @@ -120,7 +121,7 @@ def test_quanet_method(): from quapy.method.meta import QuaNet model = QuaNet(learner, sample_size=len(dataset.training), device='cuda') - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return @@ -138,7 +139,7 @@ def models_to_test_for_str_label_names(): models = list() learner = LogisticRegression for method in AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS): - models.append(method(learner())) + models.append(method(learner(random_state=0))) for method in NON_AGGREGATIVE_METHODS: models.append(method()) return models @@ -156,6 +157,7 @@ def test_str_label_names(model): dataset.test.sampling(1000, *dataset.test.prevalence())) qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True) + numpy.random.seed(0) model.fit(dataset.training) int_estim_prevalences = model.quantify(dataset.test.instances) @@ -168,7 +170,8 @@ def test_str_label_names(model): ['one' if label == 1 else 'zero' for label in dataset.training.labels]), LabelledCollection(dataset.test.instances, ['one' if label == 1 else 'zero' for label in dataset.test.labels])) - + assert all(dataset_str.training.classes_ == dataset_str.test.classes_), 'wrong indexation' + numpy.random.seed(0) model.fit(dataset_str.training) str_estim_prevalences = model.quantify(dataset_str.test.instances) diff --git a/quapy/tests/test_modsel.py b/quapy/tests/test_modsel.py index d54dcbe..180f680 100644 --- a/quapy/tests/test_modsel.py +++ b/quapy/tests/test_modsel.py @@ -5,9 +5,9 @@ from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC import quapy as qp -from method.aggregative import PACC -from model_selection import GridSearchQ -from protocol import APP +from quapy.method.aggregative import PACC +from quapy.model_selection import GridSearchQ +from quapy.protocol import APP import time @@ -20,7 +20,7 @@ class ModselTestCase(unittest.TestCase): data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) training, validation = data.training.split_stratified(0.7, random_state=1) - param_grid = {'C': np.logspace(-3,3,7)} + param_grid = {'classifier__C': np.logspace(-3,3,7)} app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, verbose=True @@ -28,8 +28,8 @@ class ModselTestCase(unittest.TestCase): print('best params', q.best_params_) print('best score', q.best_score_) - self.assertEqual(q.best_params_['C'], 10.0) - self.assertEqual(q.best_model().get_params()['C'], 10.0) + self.assertEqual(q.best_params_['classifier__C'], 10.0) + self.assertEqual(q.best_model().get_params()['classifier__C'], 10.0) def test_modsel_parallel(self): @@ -39,7 +39,7 @@ class ModselTestCase(unittest.TestCase): training, validation = data.training.split_stratified(0.7, random_state=1) # test = data.test - param_grid = {'C': np.logspace(-3,3,7)} + param_grid = {'classifier__C': np.logspace(-3,3,7)} app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, n_jobs=-1, verbose=True @@ -47,8 +47,8 @@ class ModselTestCase(unittest.TestCase): print('best params', q.best_params_) print('best score', q.best_score_) - self.assertEqual(q.best_params_['C'], 10.0) - self.assertEqual(q.best_model().get_params()['C'], 10.0) + self.assertEqual(q.best_params_['classifier__C'], 10.0) + self.assertEqual(q.best_model().get_params()['classifier__C'], 10.0) def test_modsel_parallel_speedup(self): class SlowLR(LogisticRegression): @@ -61,7 +61,7 @@ class ModselTestCase(unittest.TestCase): data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) training, validation = data.training.split_stratified(0.7, random_state=1) - param_grid = {'C': np.logspace(-3, 3, 7)} + param_grid = {'classifier__C': np.logspace(-3, 3, 7)} app = APP(validation, sample_size=100, random_state=1) tinit = time.time() @@ -95,7 +95,7 @@ class ModselTestCase(unittest.TestCase): training, validation = data.training.split_stratified(0.7, random_state=1) # test = data.test - param_grid = {'C': np.logspace(-3,3,7)} + param_grid = {'classifier__C': np.logspace(-3,3,7)} app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=3, n_jobs=-1, verbose=True diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index dea3290..1510fee 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -1,7 +1,7 @@ import unittest import numpy as np from quapy.data import LabelledCollection -from quapy.protocol import APP, NPP, USimplexPP, CovariateShiftPP, AbstractStochasticSeededProtocol +from quapy.protocol import APP, NPP, USimplexPP, DomainMixer, AbstractStochasticSeededProtocol def mock_labelled_collection(prefix=''): @@ -94,7 +94,7 @@ class TestProtocols(unittest.TestCase): def test_covariate_shift_replicate(self): dataA = mock_labelled_collection('domA') dataB = mock_labelled_collection('domB') - p = CovariateShiftPP(dataA, dataB, sample_size=10, mixture_points=11, random_state=1) + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11, random_state=1) samples1 = samples_to_str(p) samples2 = samples_to_str(p) @@ -104,7 +104,7 @@ class TestProtocols(unittest.TestCase): def test_covariate_shift_not_replicate(self): dataA = mock_labelled_collection('domA') dataB = mock_labelled_collection('domB') - p = CovariateShiftPP(dataA, dataB, sample_size=10, mixture_points=11) + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11) samples1 = samples_to_str(p) samples2 = samples_to_str(p) diff --git a/quapy/util.py b/quapy/util.py index 50a640d..6f8543d 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -22,7 +22,7 @@ def _get_parallel_slices(n_tasks, n_jobs): def map_parallel(func, args, n_jobs): """ - Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then + Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and `n_jobs`=2, then func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function that already works with a list of arguments. @@ -128,6 +128,7 @@ def create_if_not_exist(path): def get_quapy_home(): """ Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets. + This directory is `~/quapy_data` :return: a string representing the path """ @@ -162,7 +163,7 @@ def save_text_file(path, text): def pickled_resource(pickle_path:str, generation_func:callable, *args): """ - Allows for fast reuse of resources that are generated only once by calling generation_func(*args). The next times + Allows for fast reuse of resources that are generated only once by calling generation_func(\\*args). The next times this function is invoked, it loads the pickled resource. Example: >>> def some_array(n): # a mock resource created with one parameter (`n`) @@ -191,10 +192,6 @@ class EarlyStop: """ A class implementing the early-stopping condition typically used for training neural networks. - :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a - held-out validation split) can be found to be worse than the best one obtained so far, before flagging the - stopping condition. An instance of this class is `callable`, and is to be used as follows: - >>> earlystop = EarlyStop(patience=2, lower_is_better=True) >>> earlystop(0.9, epoch=0) >>> earlystop(0.7, epoch=1) @@ -206,14 +203,14 @@ class EarlyStop: >>> earlystop.best_epoch # is 1 >>> earlystop.best_score # is 0.7 - + :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a + held-out validation split) can be found to be worse than the best one obtained so far, before flagging the + stopping condition. An instance of this class is `callable`, and is to be used as follows: :param lower_is_better: if True (default) the metric is to be minimized. - :ivar best_score: keeps track of the best value seen so far :ivar best_epoch: keeps track of the epoch in which the best score was set :ivar STOP: flag (boolean) indicating the stopping condition :ivar IMPROVED: flag (boolean) indicating whether there was an improvement in the last call - """ def __init__(self, patience, lower_is_better=True): @@ -243,4 +240,5 @@ class EarlyStop: else: self.patience -= 1 if self.patience <= 0: - self.STOP = True \ No newline at end of file + self.STOP = True +