diff --git a/LeQua2022/baselinesSVD_T1A.py b/LeQua2022/baselinesSVD_T1A.py deleted file mode 100644 index 576a7ee..0000000 --- a/LeQua2022/baselinesSVD_T1A.py +++ /dev/null @@ -1,76 +0,0 @@ -import pickle - -import numpy as np -from sklearn.linear_model import LogisticRegression -from tqdm import tqdm -import pandas as pd - -import quapy as qp -from quapy.data import LabelledCollection -from quapy.method.aggregative import * -import quapy.functional as F -from data import * -import os -import constants - -from sklearn.decomposition import TruncatedSVD - - -# LeQua official baselines for task T1A (Binary/Vector) -# ===================================================== - -predictions_path = os.path.join('predictions', 'T1A') -os.makedirs(predictions_path, exist_ok=True) - -models_path = os.path.join('models', 'T1A') -os.makedirs(models_path, exist_ok=True) - -pathT1A = './data/T1A/public' -T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors') -T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv') -T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt') - -train = LabelledCollection.load(T1A_trainpath, load_binary_vectors) -nF = train.instances.shape[1] -svd = TruncatedSVD(n_components=300) -train.instances = svd.fit_transform(train.instances) - -qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE - -print(f'number of classes: {len(train.classes_)}') -print(f'number of training documents: {len(train)}') -print(f'training prevalence: {F.strprev(train.prevalence())}') -print(f'training matrix shape: {train.instances.shape}') - -true_prevalence = ResultSubmission.load(T1A_devprevalence_path) - -for quantifier in [CC, ACC, PCC, PACC, EMQ, HDy]: - - # classifier = CalibratedClassifierCV(LogisticRegression()) - classifier = LogisticRegression() - model = quantifier(classifier).fit(train) - quantifier_name = model.__class__.__name__ - - predictions = ResultSubmission(categories=['negative', 'positive']) - for samplename, sample in tqdm(gen_load_samples_T1(T1A_devvectors_path, nF), - desc=quantifier_name, total=len(true_prevalence)): - sample = svd.transform(sample) - predictions.add(samplename, model.quantify(sample)) - - predictions.dump(os.path.join(predictions_path, quantifier_name + '.svd.csv')) - pickle.dump(model, open(os.path.join(models_path, quantifier_name+'.svd.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) - - mae, mrae = evaluate_submission(true_prevalence, predictions) - print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}') - -""" -validation -CC 0.1862 1.9587 -ACC 0.0394 0.2669 -PCC 0.1789 2.1383 -PACC 0.0354 0.1587 -EMQ 0.0224 0.0960 -HDy 0.0467 0.2121 -""" - - diff --git a/LeQua2022/baselines_T1.py b/LeQua2022/baselines_T1.py new file mode 100644 index 0000000..3a52361 --- /dev/null +++ b/LeQua2022/baselines_T1.py @@ -0,0 +1,91 @@ +import argparse +import pickle +from sklearn.linear_model import LogisticRegression as LR +from quapy.method.aggregative import * +import quapy.functional as F +from data import * +import os +import constants + + +# LeQua official baselines for task T1B (Multiclass/Vector) +# ========================================================= + +def baselines(): + yield CC(LR(n_jobs=-1)), "CC" + yield ACC(LR(n_jobs=-1)), "ACC" + yield PCC(LR(n_jobs=-1)), "PCC" + yield PACC(LR(n_jobs=-1)), "PACC" + yield EMQ(CalibratedClassifierCV(LR(), n_jobs=-1)), "SLD" + yield HDy(LR(n_jobs=-1)) if args.task == 'T1A' else OneVsAll(HDy(LR()), n_jobs=-1), "HDy" + + +def main(args): + + models_path = qp.util.create_if_not_exist(os.path.join(args.modeldir, args.task)) + + path_dev_vectors = os.path.join(args.datadir, 'dev_vectors') + path_dev_prevs = os.path.join(args.datadir, 'dev_prevalences.csv') + path_train = os.path.join(args.datadir, 'training_vectors.txt') + + qp.environ['SAMPLE_SIZE'] = constants.SAMPLE_SIZE[args.task] + + train = LabelledCollection.load(path_train, load_binary_vectors) + nF = train.instances.shape[1] + + print(f'number of classes: {len(train.classes_)}') + print(f'number of training documents: {len(train)}') + print(f'training prevalence: {F.strprev(train.prevalence())}') + print(f'training matrix shape: {train.instances.shape}') + + param_grid = { + 'C': np.logspace(-3,3,7), + 'class_weight': ['balanced', None] + } + + def gen_samples(): + return gen_load_samples_T1(path_dev_vectors, nF, ground_truth_path=path_dev_prevs, return_id=False) + + for quantifier, q_name in baselines(): + print(f'{q_name}: Model selection') + quantifier = qp.model_selection.GridSearchQ( + quantifier, + param_grid, + sample_size=None, + protocol='gen', + error=qp.error.mae, + refit=False, + verbose=True + ).fit(train, gen_samples) + + print(f'{q_name} got MAE={quantifier.best_score_:.3f} (hyper-params: {quantifier.best_params_})') + + model_path = os.path.join(models_path, q_name+'.pkl') + print(f'saving model in {model_path}') + pickle.dump(quantifier.best_model(), open(model_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='LeQua2022 Task T1A/T1B baselines') + parser.add_argument('task', metavar='TASK', type=str, choices=['T1A', 'T1B'], + help='Task name (T1A, T1B)') + parser.add_argument('datadir', metavar='DATA-PATH', type=str, + help='Path of the directory containing "dev_prevalences.csv", "training_vectors.txt", and ' + 'the directory "dev_vectors"') + parser.add_argument('modeldir', metavar='MODEL-PATH', type=str, + help='Path where to save the models. ' + 'A subdirectory named will be automatically created.') + args = parser.parse_args() + + if not os.path.exists(args.datadir): + raise FileNotFoundError(f'path {args.datadir} does not exist') + if not os.path.isdir(args.datadir): + raise ValueError(f'path {args.datadir} is not a valid directory') + if not os.path.exists(os.path.join(args.datadir, "dev_prevalences.csv")): + raise FileNotFoundError(f'path {args.datadir} does not contain "dev_prevalences.csv" file') + if not os.path.exists(os.path.join(args.datadir, "training_vectors.txt")): + raise FileNotFoundError(f'path {args.datadir} does not contain "training_vectors.txt" file') + if not os.path.exists(os.path.join(args.datadir, "dev_vectors")): + raise FileNotFoundError(f'path {args.datadir} does not contain "dev_vectors" folder') + + main(args) diff --git a/LeQua2022/baselines_T1A.py b/LeQua2022/baselines_T1A.py deleted file mode 100644 index 48d42d4..0000000 --- a/LeQua2022/baselines_T1A.py +++ /dev/null @@ -1,71 +0,0 @@ -import pickle - -import numpy as np -from sklearn.linear_model import LogisticRegression -from tqdm import tqdm -import pandas as pd - -import quapy as qp -from quapy.data import LabelledCollection -from quapy.method.aggregative import * -import quapy.functional as F -from data import * -import os -import constants - - -# LeQua official baselines for task T1A (Binary/Vector) -# ===================================================== - -predictions_path = os.path.join('predictions', 'T1A') -os.makedirs(predictions_path, exist_ok=True) - -models_path = os.path.join('models', 'T1A') -os.makedirs(models_path, exist_ok=True) - -pathT1A = './data/T1A/public' -T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors') -T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv') -T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt') - -train = LabelledCollection.load(T1A_trainpath, load_binary_vectors) -nF = train.instances.shape[1] - -qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE - -print(f'number of classes: {len(train.classes_)}') -print(f'number of training documents: {len(train)}') -print(f'training prevalence: {F.strprev(train.prevalence())}') -print(f'training matrix shape: {train.instances.shape}') - -true_prevalence = ResultSubmission.load(T1A_devprevalence_path) - -for quantifier in [CC, ACC, PCC, PACC, EMQ, HDy]: - - # classifier = CalibratedClassifierCV(LogisticRegression(C=1)) - classifier = LogisticRegression(C=1) - model = quantifier(classifier).fit(train) - quantifier_name = model.__class__.__name__ - - predictions = ResultSubmission(categories=['negative', 'positive']) - for samplename, sample in tqdm(gen_load_samples_T1(T1A_devvectors_path, nF), - desc=quantifier_name, total=len(true_prevalence)): - predictions.add(samplename, model.quantify(sample)) - - predictions.dump(os.path.join(predictions_path, quantifier_name + '.csv')) - pickle.dump(model, open(os.path.join(models_path, quantifier_name+'.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL) - - mae, mrae = evaluate_submission(true_prevalence, predictions) - print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}') - -""" -validation -CC 0.1862 1.9587 -ACC 0.0394 0.2669 -PCC 0.1789 2.1383 -PACC 0.0354 0.1587 -EMQ 0.0224 0.0960 -HDy 0.0467 0.2121 -""" - - diff --git a/LeQua2022/baselines_T1Amodsel.py b/LeQua2022/baselines_T1Amodsel.py deleted file mode 100644 index 12d658a..0000000 --- a/LeQua2022/baselines_T1Amodsel.py +++ /dev/null @@ -1,81 +0,0 @@ -import pickle -from sklearn.linear_model import LogisticRegression -from quapy.method.aggregative import * -import quapy.functional as F -from data import * -import os -import constants - - -# LeQua official baselines for task T1A (Binary/Vector) -# ===================================================== - -predictions_path = os.path.join('predictions', 'T1A') -os.makedirs(predictions_path, exist_ok=True) - -models_path = os.path.join('models', 'T1A') -os.makedirs(models_path, exist_ok=True) - -pathT1A = './data/T1A/public' -T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors') -T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv') -T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt') - -train = LabelledCollection.load(T1A_trainpath, load_binary_vectors) -nF = train.instances.shape[1] - -qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE - -print(f'number of classes: {len(train.classes_)}') -print(f'number of training documents: {len(train)}') -print(f'training prevalence: {F.strprev(train.prevalence())}') -print(f'training matrix shape: {train.instances.shape}') - -true_prevalence = ResultSubmission.load(T1A_devprevalence_path) - -param_grid = { - 'C': np.logspace(-3,3,7), - 'class_weight': ['balanced', None] -} - - -def gen_samples(): - return gen_load_samples_T1(T1A_devvectors_path, nF, ground_truth_path=T1A_devprevalence_path, return_id=False) - - -for quantifier in [EMQ]: # [CC, ACC, PCC, PACC, EMQ, HDy]: - if quantifier == EMQ: - classifier = CalibratedClassifierCV(LogisticRegression(), n_jobs=-1) - else: - classifier = LogisticRegression() - model = quantifier(classifier) - print(f'{model.__class__.__name__}: Model selection') - model = qp.model_selection.GridSearchQ( - model, - param_grid, - sample_size=None, - protocol='gen', - error=qp.error.mae, - refit=False, - verbose=True - ).fit(train, gen_samples) - - quantifier_name = model.best_model().__class__.__name__ - print(f'{quantifier_name} mae={model.best_score_:.3f} (params: {model.best_params_})') - - pickle.dump(model.best_model(), - open(os.path.join(models_path, quantifier_name+'.pkl'), 'wb'), - protocol=pickle.HIGHEST_PROTOCOL) - - -""" -validation -CC 0.1862 1.9587 -ACC 0.0394 0.2669 -PCC 0.1789 2.1383 -PACC 0.0354 0.1587 -EMQ 0.0224 0.0960 -HDy 0.0467 0.2121 -""" - - diff --git a/LeQua2022/baselines_T1B.py b/LeQua2022/baselines_T1B.py deleted file mode 100644 index 1344bbc..0000000 --- a/LeQua2022/baselines_T1B.py +++ /dev/null @@ -1,55 +0,0 @@ -import pickle - -import numpy as np -from sklearn.linear_model import LogisticRegression -from tqdm import tqdm -import pandas as pd - -import quapy as qp -from quapy.data import LabelledCollection -from quapy.method.aggregative import * -import quapy.functional as F -from data import * -import os -import constants - -predictions_path = os.path.join('predictions', 'T1B') # multiclass - vector -os.makedirs(predictions_path, exist_ok=True) - -pathT1B = './data/T1B/public' -T1B_devvectors_path = os.path.join(pathT1B, 'dev_vectors') -T1B_devprevalence_path = os.path.join(pathT1B, 'dev_prevalences.csv') -T1B_trainpath = os.path.join(pathT1B, 'training_vectors.txt') -T1B_catmap = os.path.join(pathT1B, 'training_vectors_label_map.txt') - -train = LabelledCollection.load(T1B_trainpath, load_binary_vectors) -nF = train.instances.shape[1] - -qp.environ['SAMPLE_SIZE'] = constants.T1B_SAMPLE_SIZE - -print(f'number of classes: {len(train.classes_)}') -print(f'number of training documents: {len(train)}') -print(f'training prevalence: {F.strprev(train.prevalence())}') -print(f'training matrix shape: {train.instances.shape}') - -true_prevalence = ResultSubmission.load(T1B_devprevalence_path) - -cat2code, categories = load_category_map(T1B_catmap) - -for quantifier in [PACC]: # [CC, ACC, PCC, PACC, EMQ]: - - classifier = CalibratedClassifierCV(LogisticRegression()) - model = quantifier(classifier).fit(train) - quantifier_name = model.__class__.__name__ - - predictions = ResultSubmission(categories=categories) - for samplename, sample in tqdm(gen_load_samples_T1(T1B_devvectors_path, nF), - desc=quantifier_name, total=len(true_prevalence)): - predictions.add(samplename, model.quantify(sample)) - - predictions.dump(os.path.join(predictions_path, quantifier_name + '.csv')) - mae, mrae = evaluate_submission(true_prevalence, predictions) - print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}') - - - diff --git a/LeQua2022/constants.py b/LeQua2022/constants.py index 11a78ce..7036eff 100644 --- a/LeQua2022/constants.py +++ b/LeQua2022/constants.py @@ -9,4 +9,11 @@ T1B_SAMPLE_SIZE = 1000 T2A_SAMPLE_SIZE = 250 T2B_SAMPLE_SIZE = 1000 +SAMPLE_SIZE={ + 'T1A': T1A_SAMPLE_SIZE, + 'T1B': T1B_SAMPLE_SIZE, + 'T2A': T2A_SAMPLE_SIZE, + 'T2A': T2B_SAMPLE_SIZE +} + ERROR_TOL = 1E-3 diff --git a/LeQua2022/data.py b/LeQua2022/data.py index bcea49f..e4a1095 100644 --- a/LeQua2022/data.py +++ b/LeQua2022/data.py @@ -34,27 +34,23 @@ def load_category_map(path): def load_binary_vectors(path, nF=None): - return sklearn.datasets.load_svmlight_file(path, n_features=nF) + X, y = sklearn.datasets.load_svmlight_file(path, n_features=nF) + y = y.astype(int) + return X, y def __gen_load_samples_with_groudtruth(path_dir:str, return_id:bool, ground_truth_path:str, load_fn, **load_kwargs): true_prevs = ResultSubmission.load(ground_truth_path) for id, prevalence in true_prevs.iterrows(): sample, _ = load_fn(os.path.join(path_dir, f'{id}.txt'), **load_kwargs) - if return_id: - yield id, sample, prevalence - else: - yield sample, prevalence + yield (id, sample, prevalence) if return_id else (sample, prevalence) def __gen_load_samples_without_groudtruth(path_dir:str, return_id:bool, load_fn, **load_kwargs): nsamples = len(glob(os.path.join(path_dir, '*.txt'))) for id in range(nsamples): sample, _ = load_fn(os.path.join(path_dir, f'{id}.txt'), **load_kwargs) - if return_id: - yield id, sample - else: - yield sample + yield (id, sample) if return_id else sample def gen_load_samples_T1(path_dir:str, nF:int, ground_truth_path:str = None, return_id=True): @@ -68,6 +64,17 @@ def gen_load_samples_T1(path_dir:str, nF:int, ground_truth_path:str = None, retu yield r +def genSVD_load_samples_T1(load_fn, path_dir:str, nF:int, ground_truth_path:str = None, return_id=True): + if ground_truth_path is None: + # the generator function returns tuples (filename:str, sample:csr_matrix) + gen_fn = __gen_load_samples_without_groudtruth(path_dir, return_id, load_fn, nF=nF) + else: + # the generator function returns tuples (filename:str, sample:csr_matrix, prevalence:ndarray) + gen_fn = __gen_load_samples_with_groudtruth(path_dir, return_id, ground_truth_path, load_fn, nF=nF) + for r in gen_fn: + yield r + + def gen_load_samples_T2A(path_dir:str, ground_truth_path:str = None): # for ... : yield pass diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt index 18a0623..608daab 100644 --- a/docs/build/html/_sources/index.rst.txt +++ b/docs/build/html/_sources/index.rst.txt @@ -78,7 +78,7 @@ Features Methods Model Selection Plotting - API Developer documentation + API Developers documentation diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html index d1eb190..5d2c96f 100644 --- a/docs/build/html/genindex.html +++ b/docs/build/html/genindex.html @@ -230,8 +230,6 @@
  • compute_table() (quapy.method.aggregative.ThresholdOptimization method)
  • compute_tpr() (quapy.method.aggregative.ThresholdOptimization method) -
  • -
  • conv_block() (quapy.classification.neural.CNNnet method)
  • counts() (quapy.data.base.LabelledCollection method)
  • @@ -337,7 +335,7 @@
  • fetch_UCILabelledCollection() (in module quapy.data.datasets)
  • -
  • fit() (quapy.classification.methods.PCALR method) +
  • fit() (quapy.classification.methods.LowRankLogisticRegression method) - + @@ -673,8 +669,6 @@
  • PACC (class in quapy.method.aggregative)
  • parallel() (in module quapy.util) -
  • -
  • PCALR (class in quapy.classification.methods)
  • PCC (class in quapy.method.aggregative)
  • @@ -686,7 +680,7 @@
  • (quapy.method.aggregative.OneVsAll method)
  • -
  • predict() (quapy.classification.methods.PCALR method) +
  • predict() (quapy.classification.methods.LowRankLogisticRegression method)
  • -
  • predict_proba() (quapy.classification.methods.PCALR method) +
  • predict_proba() (quapy.classification.methods.LowRankLogisticRegression method)
    • (quapy.classification.neural.NeuralClassifierTrainer method) @@ -952,7 +946,7 @@
    • se() (in module quapy.error)
    • -
    • set_params() (quapy.classification.methods.PCALR method) +
    • set_params() (quapy.classification.methods.LowRankLogisticRegression method)
      • (quapy.classification.neural.NeuralClassifierTrainer method) @@ -1032,7 +1026,7 @@
      • training_helper() (in module quapy.method.aggregative)
      • -
      • transform() (quapy.classification.methods.PCALR method) +
      • transform() (quapy.classification.methods.LowRankLogisticRegression method)
      • -
      • API Developer documentation
          +
        • API Developers documentation
        • diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv index 76b8d31..318d517 100644 Binary files a/docs/build/html/objects.inv and b/docs/build/html/objects.inv differ diff --git a/docs/build/html/quapy.classification.html b/docs/build/html/quapy.classification.html index a5a1ace..34fe110 100644 --- a/docs/build/html/quapy.classification.html +++ b/docs/build/html/quapy.classification.html @@ -59,69 +59,108 @@

          quapy.classification.methods module

          -
          -class quapy.classification.methods.PCALR(n_components=100, **kwargs)
          +
          +class quapy.classification.methods.LowRankLogisticRegression(n_components=100, **kwargs)

          Bases: sklearn.base.BaseEstimator

          -

          An example of a classification method that also generates embedded inputs, as those required for QuaNet. -This example simply combines a Principal Component Analysis (PCA) with Logistic Regression (LR).

          -
          -
          -fit(X, y)
          -
          - -
          -
          -get_params()
          -

          Get parameters for this estimator.

          +

          An example of a classification method (i.e., an object that implements fit, predict, and predict_proba) +that also generates embedded inputs (i.e., that implements transform), as those required for +quapy.method.neural.QuaNet. This is a mock method to allow for easily instantiating +quapy.method.neural.QuaNet on array-like real-valued instances. +The transformation consists of applying sklearn.decomposition.TruncatedSVD +while classification is performed using sklearn.linear_model.LogisticRegression on the low-rank space

          Parameters
          -

          deep (bool, default=True) – If True, will return the parameters for this estimator and -contained subobjects that are estimators.

          +
            +
          • n_components – the number of principal components to retain

          • +
          • kwargs – parameters for the +Logistic Regression classifier

          • +
          +
          +
          +
          +
          +fit(X, y)
          +

          Fit the model according to the given training data. The fit consists of +fitting TruncatedSVD and Logistic Regression.

          +
          +
          Parameters
          +
            +
          • X – array-like of shape (n_samples, n_features) with the instances

          • +
          • y – array-like of shape (n_samples, n_classes) with the class labels

          • +
          Returns
          -

          params – Parameter names mapped to their values.

          -
          -
          Return type
          -

          dict

          +

          self

          -
          -predict(X)
          -
          +
          +get_params()
          +

          Get hyper-parameters for this estimator

          +
          +
          Returns
          +

          a dictionary with parameter names mapped to their values

          +
          +
          +
          -
          -predict_proba(X)
          -
          +
          +predict(X)
          +

          Predicts labels for the instances X

          +
          +
          Parameters
          +

          X – array-like of shape (n_samples, n_features) instances to classify

          +
          +
          Returns
          +

          a numpy array of length n containing the label predictions, where n is the number of +instances in X

          +
          +
          +
          -
          -set_params(**params)
          +
          +predict_proba(X)
          +

          Predicts posterior probabilities for the instances X

          +
          +
          Parameters
          +

          X – array-like of shape (n_samples, n_features) instances to classify

          +
          +
          Returns
          +

          array-like of shape (n_samples, n_classes) with the posterior probabilities

          +
          +
          +
          + +
          +
          +set_params(**params)

          Set the parameters of this estimator.

          -

          The method works on simple estimators as well as on nested objects -(such as Pipeline). The latter have -parameters of the form <component>__<parameter> so that it’s -possible to update each component of a nested object.

          Parameters
          -

          **params (dict) – Estimator parameters.

          -
          -
          Returns
          -

          self – Estimator instance.

          -
          -
          Return type
          -

          estimator instance

          +

          parameters – a **kwargs dictionary with the estimator parameters for +Logistic Regression +and eventually also n_components for PCA

          -
          -transform(X)
          -
          +
          +transform(X)
          +

          Returns the low-rank approximation of X with n_components dimensions

          +
          +
          Parameters
          +

          X – array-like of shape (n_samples, n_features) instances to embed

          +
          +
          Returns
          +

          array-like of shape (n_samples, n_components) with the embedded instances

          +
          +
          +
          @@ -132,25 +171,63 @@ possible to update each component of a nested object.

          class quapy.classification.neural.CNNnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5)

          Bases: quapy.classification.neural.TextClassifierNet

          -
          -
          -conv_block(input, conv_layer)
          -
          - +

          An implementation of quapy.classification.neural.TextClassifierNet based on +Convolutional Neural Networks.

          +
          +
          Parameters
          +
            +
          • vocabulary_size – the size of the vocabulary

          • +
          • n_classes – number of target classes

          • +
          • embedding_size – the dimensionality of the word embeddings space (default 100)

          • +
          • hidden_size – the dimensionality of the hidden space (default 256)

          • +
          • repr_size – the dimensionality of the document embeddings space (default 100)

          • +
          • kernel_heights – list of kernel lengths (default [3,5,7]), i.e., the number of +consecutive tokens that each kernel covers

          • +
          • stride – convolutional stride (default 1)

          • +
          • stride – convolutional pad (default 0)

          • +
          • drop_p – drop probability for dropout (default 0.5)

          • +
          +
          +
          document_embedding(input)
          -
          +

          Embeds documents (i.e., performs the forward pass up to the +next-to-last layer).

          +
          +
          Parameters
          +

          input – a batch of instances, typically generated by a torch’s DataLoader +instance (see quapy.classification.neural.TorchDataset)

          +
          +
          Returns
          +

          a torch tensor of shape (n_samples, n_dimensions), where +n_samples is the number of documents, and n_dimensions is the +dimensionality of the embedding

          +
          +
          +
          get_params()
          -
          +

          Get hyper-parameters for this estimator

          +
          +
          Returns
          +

          a dictionary with parameter names mapped to their values

          +
          +
          +
          property vocabulary_size
          -
          +

          Return the size of the vocabulary

          +
          +
          Returns
          +

          integer

          +
          +
          +
          @@ -158,25 +235,60 @@ possible to update each component of a nested object.

          class quapy.classification.neural.LSTMnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1, drop_p=0.5)

          Bases: quapy.classification.neural.TextClassifierNet

          +

          An implementation of quapy.classification.neural.TextClassifierNet based on +Long Short Term Memory networks.

          +
          +
          Parameters
          +
            +
          • vocabulary_size – the size of the vocabulary

          • +
          • n_classes – number of target classes

          • +
          • embedding_size – the dimensionality of the word embeddings space (default 100)

          • +
          • hidden_size – the dimensionality of the hidden space (default 256)

          • +
          • repr_size – the dimensionality of the document embeddings space (default 100)

          • +
          • lstm_class_nlayers – number of LSTM layers (default 1)

          • +
          • drop_p – drop probability for dropout (default 0.5)

          • +
          +
          +
          document_embedding(x)
          -
          +

          Embeds documents (i.e., performs the forward pass up to the +next-to-last layer).

          +
          +
          Parameters
          +

          x – a batch of instances, typically generated by a torch’s DataLoader +instance (see quapy.classification.neural.TorchDataset)

          +
          +
          Returns
          +

          a torch tensor of shape (n_samples, n_dimensions), where +n_samples is the number of documents, and n_dimensions is the +dimensionality of the embedding

          +
          +
          +
          get_params()
          -
          - -
          -
          -init_hidden(set_size)
          -
          +

          Get hyper-parameters for this estimator

          +
          +
          Returns
          +

          a dictionary with parameter names mapped to their values

          +
          +
          +
          property vocabulary_size
          -
          +

          Return the size of the vocabulary

          +
          +
          Returns
          +

          integer

          +
          +
          +
          @@ -184,45 +296,135 @@ possible to update each component of a nested object.

          class quapy.classification.neural.NeuralClassifierTrainer(net: quapy.classification.neural.TextClassifierNet, lr=0.001, weight_decay=0, patience=10, epochs=200, batch_size=64, batch_size_test=512, padding_length=300, device='cpu', checkpointpath='../checkpoint/classifier_net.dat')

          Bases: object

          +

          Trains a neural network for text classification.

          +
          +
          Parameters
          +
            +
          • net – an instance of TextClassifierNet implementing the forward pass

          • +
          • lr – learning rate (default 1e-3)

          • +
          • weight_decay – weight decay (default 0)

          • +
          • patience – number of epochs that do not show any improvement in validation +to wait before applying early stop (default 10)

          • +
          • epochs – maximum number of training epochs (default 200)

          • +
          • batch_size – batch size for training (default 64)

          • +
          • batch_size_test – batch size for test (default 512)

          • +
          • padding_length – maximum number of tokens to consider in a document (default 300)

          • +
          • device – specify ‘cpu’ (default) or ‘cuda’ for enabling gpu

          • +
          • checkpointpath – where to store the parameters of the best model found so far +according to the evaluation in the held-out validation split (default ‘../checkpoint/classifier_net.dat’)

          • +
          +
          +
          property device
          -
          +

          Gets the device in which the network is allocated

          +
          +
          Returns
          +

          device

          +
          +
          +
          fit(instances, labels, val_split=0.3)
          -
          +

          Fits the model according to the given training data.

          +
          +
          Parameters
          +
            +
          • instances – list of lists of indexed tokens

          • +
          • labels – array-like of shape (n_samples, n_classes) with the class labels

          • +
          • val_split – proportion of training documents to be taken as the validation set (default 0.3)

          • +
          +
          +
          Returns
          +

          +
          +
          +
          get_params()
          -
          +

          Get hyper-parameters for this estimator

          +
          +
          Returns
          +

          a dictionary with parameter names mapped to their values

          +
          +
          +
          predict(instances)
          -
          +

          Predicts labels for the instances

          +
          +
          Parameters
          +

          instances – list of lists of indexed tokens

          +
          +
          Returns
          +

          a numpy array of length n containing the label predictions, where n is the number of +instances in X

          +
          +
          +
          predict_proba(instances)
          -
          +

          Predicts posterior probabilities for the instances

          +
          +
          Parameters
          +

          X – array-like of shape (n_samples, n_features) instances to classify

          +
          +
          Returns
          +

          array-like of shape (n_samples, n_classes) with the posterior probabilities

          +
          +
          +
          reset_net_params(vocab_size, n_classes)
          -
          +

          Reinitialize the network parameters

          +
          +
          Parameters
          +
            +
          • vocab_size – the size of the vocabulary

          • +
          • n_classes – the number of target classes

          • +
          +
          +
          +
          set_params(**params)
          -
          +

          Set the parameters of this trainer and the learner it is training. +In this current version, parameter names for the trainer and learner should +be disjoint.

          +
          +
          Parameters
          +

          params – a **kwargs dictionary with the parameters

          +
          +
          +
          transform(instances)
          -
          +

          Returns the embeddings of the instances

          +
          +
          Parameters
          +

          instances – list of lists of indexed tokens

          +
          +
          Returns
          +

          array-like of shape (n_samples, embed_size) with the embedded instances, +where embed_size is defined by the classification network

          +
          +
          +
          @@ -230,49 +432,95 @@ possible to update each component of a nested object.

          class quapy.classification.neural.TextClassifierNet

          Bases: torch.nn.modules.module.Module

          +

          Abstract Text classifier (torch.nn.Module)

          dimensions()
          -
          +

          Gets the number of dimensions of the embedding space

          +
          +
          Returns
          +

          integer

          +
          +
          +
          abstract document_embedding(x)
          -
          +

          Embeds documents (i.e., performs the forward pass up to the +next-to-last layer).

          +
          +
          Parameters
          +

          x – a batch of instances, typically generated by a torch’s DataLoader +instance (see quapy.classification.neural.TorchDataset)

          +
          +
          Returns
          +

          a torch tensor of shape (n_samples, n_dimensions), where +n_samples is the number of documents, and n_dimensions is the +dimensionality of the embedding

          +
          +
          +
          forward(x)
          -

          Defines the computation performed at every call.

          -

          Should be overridden by all subclasses.

          -
          -

          Note

          -

          Although the recipe for forward pass needs to be defined within -this function, one should call the Module instance afterwards -instead of this since the former takes care of running the -registered hooks while the latter silently ignores them.

          -
          +

          Performs the forward pass.

          +
          +
          Parameters
          +

          x – a batch of instances, typically generated by a torch’s DataLoader +instance (see quapy.classification.neural.TorchDataset)

          +
          +
          Returns
          +

          a tensor of shape (n_instances, n_classes) with the decision scores +for each of the instances and classes

          +
          +
          abstract get_params()
          -
          +

          Get hyper-parameters for this estimator

          +
          +
          Returns
          +

          a dictionary with parameter names mapped to their values

          +
          +
          +
          predict_proba(x)
          -
          +

          Predicts posterior probabilities for the instances in x

          +
          +
          Parameters
          +

          x – a torch tensor of indexed tokens with shape (n_instances, pad_length) +where n_instances is the number of instances in the batch, and pad_length +is length of the pad in the batch

          +
          +
          Returns
          +

          array-like of shape (n_samples, n_classes) with the posterior probabilities

          +
          +
          +
          property vocabulary_size
          -
          +

          Return the size of the vocabulary

          +
          +
          Returns
          +

          integer

          +
          +
          +
          xavier_uniform()
          -
          +

          Performs Xavier initialization of the network parameters

          +
          @@ -280,10 +528,36 @@ registered hooks while the latter silently ignores them.

          class quapy.classification.neural.TorchDataset(instances, labels=None)

          Bases: torch.utils.data.dataset.Dataset

          +

          Transforms labelled instances into a Torch’s torch.utils.data.DataLoader object

          +
          +
          Parameters
          +
            +
          • instances – list of lists of indexed tokens

          • +
          • labels – array-like of shape (n_samples, n_classes) with the class labels

          • +
          +
          +
          asDataloader(batch_size, shuffle, pad_length, device)
          -
          +

          Converts the labelled collection into a Torch DataLoader with dynamic padding for +the batch

          +
          +
          Parameters
          +
            +
          • batch_size – batch size

          • +
          • shuffle – whether or not to shuffle instances

          • +
          • pad_length – the maximum length for the list of tokens (dynamic padding is +applied, meaning that if the longest document in the batch is shorter than +pad_length, then the batch is padded up to its length, and not to pad_length.

          • +
          • device – whether to allocate tensors in cpu or in cuda

          • +
          +
          +
          Returns
          +

          a torch.utils.data.DataLoader object

          +
          +
          +
          @@ -294,38 +568,79 @@ registered hooks while the latter silently ignores them.

          class quapy.classification.svmperf.SVMperf(svmperf_base, C=0.01, verbose=False, loss='01')

          Bases: sklearn.base.BaseEstimator, sklearn.base.ClassifierMixin

          +

          A wrapper for the SVM-perf package by Thorsten Joachims. +When using losses for quantification, the source code has to be patched. See +the installation documentation +for further details.

          +

          References:

          +
          +
          +
          +
          Parameters
          +
            +
          • svmperf_base – path to directory containing the binary files svm_perf_learn and svm_perf_classify

          • +
          • C – trade-off between training error and margin (default 0.01)

          • +
          • verbose – set to True to print svm-perf std outputs

          • +
          • loss – the loss to optimize for. Available losses are “01”, “f1”, “kld”, “nkld”, “q”, “qacc”, “qf1”, “qgm”, “mae”, “mrae”.

          • +
          +
          +
          decision_function(X, y=None)
          -
          +

          Evaluate the decision function for the samples in X.

          +
          +
          Parameters
          +
            +
          • X – array-like of shape (n_samples, n_features) containing the instances to classify

          • +
          • y – unused

          • +
          +
          +
          Returns
          +

          array-like of shape (n_samples,) containing the decision scores of the instances

          +
          +
          +
          fit(X, y)
          -
          +

          Trains the SVM for the multivariate performance loss

          +
          +
          Parameters
          +
            +
          • X – training instances

          • +
          • y – a binary vector of labels

          • +
          +
          +
          Returns
          +

          self

          +
          +
          +
          predict(X)
          -
          +

          Predicts labels for the instances X +:param X: array-like of shape (n_samples, n_features) instances to classify +:return: a numpy array of length n containing the label predictions, where n is the number of

          +
          +

          instances in X

          +
          +
          set_params(**parameters)
          -

          Set the parameters of this estimator.

          -

          The method works on simple estimators as well as on nested objects -(such as Pipeline). The latter have -parameters of the form <component>__<parameter> so that it’s -possible to update each component of a nested object.

          +

          Set the hyper-parameters for svm-perf. Currently, only the C parameter is supported

          Parameters
          -

          **params (dict) – Estimator parameters.

          -
          -
          Returns
          -

          self – Estimator instance.

          -
          -
          Return type
          -

          estimator instance

          +

          parameters – a **kwargs dictionary {‘C’: <float>}

          diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index b400270..b8a80a1 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["Datasets","Evaluation","Installation","Methods","Model-Selection","Plotting","index","modules","quapy","quapy.classification","quapy.data","quapy.method"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["Datasets.md","Evaluation.md","Installation.rst","Methods.md","Model-Selection.md","Plotting.md","index.rst","modules.rst","quapy.rst","quapy.classification.rst","quapy.data.rst","quapy.method.rst"],objects:{"":{quapy:[8,0,0,"-"]},"quapy.classification":{methods:[9,0,0,"-"],neural:[9,0,0,"-"],svmperf:[9,0,0,"-"]},"quapy.classification.methods":{PCALR:[9,1,1,""]},"quapy.classification.methods.PCALR":{fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural":{CNNnet:[9,1,1,""],LSTMnet:[9,1,1,""],NeuralClassifierTrainer:[9,1,1,""],TextClassifierNet:[9,1,1,""],TorchDataset:[9,1,1,""]},"quapy.classification.neural.CNNnet":{conv_block:[9,2,1,""],document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.LSTMnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],init_hidden:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.NeuralClassifierTrainer":{device:[9,3,1,""],fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],reset_net_params:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural.TextClassifierNet":{dimensions:[9,2,1,""],document_embedding:[9,2,1,""],forward:[9,2,1,""],get_params:[9,2,1,""],predict_proba:[9,2,1,""],vocabulary_size:[9,3,1,""],xavier_uniform:[9,2,1,""]},"quapy.classification.neural.TorchDataset":{asDataloader:[9,2,1,""]},"quapy.classification.svmperf":{SVMperf:[9,1,1,""]},"quapy.classification.svmperf.SVMperf":{decision_function:[9,2,1,""],fit:[9,2,1,""],predict:[9,2,1,""],set_params:[9,2,1,""],valid_losses:[9,4,1,""]},"quapy.data":{base:[10,0,0,"-"],datasets:[10,0,0,"-"],preprocessing:[10,0,0,"-"],reader:[10,0,0,"-"]},"quapy.data.base":{Dataset:[10,1,1,""],LabelledCollection:[10,1,1,""],isbinary:[10,5,1,""]},"quapy.data.base.Dataset":{SplitStratified:[10,2,1,""],binary:[10,3,1,""],classes_:[10,3,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],stats:[10,2,1,""],vocabulary_size:[10,3,1,""]},"quapy.data.base.LabelledCollection":{Xy:[10,3,1,""],artificial_sampling_generator:[10,2,1,""],artificial_sampling_index_generator:[10,2,1,""],binary:[10,3,1,""],counts:[10,2,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],natural_sampling_generator:[10,2,1,""],natural_sampling_index_generator:[10,2,1,""],prevalence:[10,2,1,""],sampling:[10,2,1,""],sampling_from_index:[10,2,1,""],sampling_index:[10,2,1,""],split_stratified:[10,2,1,""],stats:[10,2,1,""],uniform_sampling:[10,2,1,""],uniform_sampling_index:[10,2,1,""]},"quapy.data.datasets":{df_replace:[10,5,1,""],fetch_UCIDataset:[10,5,1,""],fetch_UCILabelledCollection:[10,5,1,""],fetch_reviews:[10,5,1,""],fetch_twitter:[10,5,1,""],warn:[10,5,1,""]},"quapy.data.preprocessing":{IndexTransformer:[10,1,1,""],index:[10,5,1,""],reduce_columns:[10,5,1,""],standardize:[10,5,1,""],text2tfidf:[10,5,1,""]},"quapy.data.preprocessing.IndexTransformer":{add_word:[10,2,1,""],fit:[10,2,1,""],fit_transform:[10,2,1,""],index:[10,2,1,""],transform:[10,2,1,""],vocabulary_size:[10,2,1,""]},"quapy.data.reader":{binarize:[10,5,1,""],from_csv:[10,5,1,""],from_sparse:[10,5,1,""],from_text:[10,5,1,""],reindex_labels:[10,5,1,""]},"quapy.error":{absolute_error:[8,5,1,""],acc_error:[8,5,1,""],acce:[8,5,1,""],ae:[8,5,1,""],f1_error:[8,5,1,""],f1e:[8,5,1,""],from_name:[8,5,1,""],kld:[8,5,1,""],mae:[8,5,1,""],mean_absolute_error:[8,5,1,""],mean_relative_absolute_error:[8,5,1,""],mkld:[8,5,1,""],mnkld:[8,5,1,""],mrae:[8,5,1,""],mse:[8,5,1,""],nkld:[8,5,1,""],rae:[8,5,1,""],relative_absolute_error:[8,5,1,""],se:[8,5,1,""],smooth:[8,5,1,""]},"quapy.evaluation":{artificial_prevalence_prediction:[8,5,1,""],artificial_prevalence_protocol:[8,5,1,""],artificial_prevalence_report:[8,5,1,""],evaluate:[8,5,1,""],gen_prevalence_prediction:[8,5,1,""],natural_prevalence_prediction:[8,5,1,""],natural_prevalence_protocol:[8,5,1,""],natural_prevalence_report:[8,5,1,""]},"quapy.functional":{HellingerDistance:[8,5,1,""],adjusted_quantification:[8,5,1,""],artificial_prevalence_sampling:[8,5,1,""],get_nprevpoints_approximation:[8,5,1,""],normalize_prevalence:[8,5,1,""],num_prevalence_combinations:[8,5,1,""],prevalence_from_labels:[8,5,1,""],prevalence_from_probabilities:[8,5,1,""],prevalence_linspace:[8,5,1,""],strprev:[8,5,1,""],uniform_prevalence_sampling:[8,5,1,""],uniform_simplex_sampling:[8,5,1,""]},"quapy.method":{aggregative:[11,0,0,"-"],base:[11,0,0,"-"],meta:[11,0,0,"-"],neural:[11,0,0,"-"],non_aggregative:[11,0,0,"-"]},"quapy.method.aggregative":{ACC:[11,1,1,""],AdjustedClassifyAndCount:[11,4,1,""],AggregativeProbabilisticQuantifier:[11,1,1,""],AggregativeQuantifier:[11,1,1,""],CC:[11,1,1,""],ClassifyAndCount:[11,4,1,""],ELM:[11,1,1,""],EMQ:[11,1,1,""],ExpectationMaximizationQuantifier:[11,4,1,""],ExplicitLossMinimisation:[11,4,1,""],HDy:[11,1,1,""],HellingerDistanceY:[11,4,1,""],MAX:[11,1,1,""],MS2:[11,1,1,""],MS:[11,1,1,""],MedianSweep2:[11,4,1,""],MedianSweep:[11,4,1,""],OneVsAll:[11,1,1,""],PACC:[11,1,1,""],PCC:[11,1,1,""],ProbabilisticAdjustedClassifyAndCount:[11,4,1,""],ProbabilisticClassifyAndCount:[11,4,1,""],SVMAE:[11,1,1,""],SVMKLD:[11,1,1,""],SVMNKLD:[11,1,1,""],SVMQ:[11,1,1,""],SVMRAE:[11,1,1,""],T50:[11,1,1,""],ThresholdOptimization:[11,1,1,""],X:[11,1,1,""],training_helper:[11,5,1,""]},"quapy.method.aggregative.ACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""],solve_adjustment:[11,2,1,""]},"quapy.method.aggregative.AggregativeProbabilisticQuantifier":{posterior_probabilities:[11,2,1,""],predict_proba:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.AggregativeQuantifier":{aggregate:[11,2,1,""],aggregative:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],learner:[11,3,1,""],n_classes:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.CC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ELM":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.EMQ":{EM:[11,2,1,""],EPSILON:[11,4,1,""],MAX_ITER:[11,4,1,""],aggregate:[11,2,1,""],fit:[11,2,1,""],predict_proba:[11,2,1,""]},"quapy.method.aggregative.HDy":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.MS":{optimize_threshold:[11,2,1,""]},"quapy.method.aggregative.MS2":{optimize_threshold:[11,2,1,""]},"quapy.method.aggregative.OneVsAll":{aggregate:[11,2,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],posterior_probabilities:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.PACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.PCC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ThresholdOptimization":{aggregate:[11,2,1,""],compute_fpr:[11,2,1,""],compute_table:[11,2,1,""],compute_tpr:[11,2,1,""],fit:[11,2,1,""],optimize_threshold:[11,2,1,""]},"quapy.method.base":{BaseQuantifier:[11,1,1,""],BinaryQuantifier:[11,1,1,""],isaggregative:[11,5,1,""],isbinary:[11,5,1,""],isprobabilistic:[11,5,1,""]},"quapy.method.base.BaseQuantifier":{aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.base.BinaryQuantifier":{binary:[11,3,1,""]},"quapy.method.meta":{EACC:[11,5,1,""],ECC:[11,5,1,""],EEMQ:[11,5,1,""],EHDy:[11,5,1,""],EPACC:[11,5,1,""],Ensemble:[11,1,1,""],ensembleFactory:[11,5,1,""],get_probability_distribution:[11,5,1,""]},"quapy.method.meta.Ensemble":{VALID_POLICIES:[11,4,1,""],accuracy_policy:[11,2,1,""],aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],ds_policy:[11,2,1,""],ds_policy_get_posteriors:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],ptr_policy:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""],sout:[11,2,1,""]},"quapy.method.neural":{QuaNetModule:[11,1,1,""],QuaNetTrainer:[11,1,1,""],mae_loss:[11,5,1,""]},"quapy.method.neural.QuaNetModule":{device:[11,3,1,""],forward:[11,2,1,""],init_hidden:[11,2,1,""]},"quapy.method.neural.QuaNetTrainer":{classes_:[11,3,1,""],clean_checkpoint:[11,2,1,""],clean_checkpoint_dir:[11,2,1,""],epoch:[11,2,1,""],fit:[11,2,1,""],get_aggregative_estims:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.non_aggregative":{MaximumLikelihoodPrevalenceEstimation:[11,1,1,""]},"quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation":{classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.model_selection":{GridSearchQ:[8,1,1,""]},"quapy.model_selection.GridSearchQ":{best_model:[8,2,1,""],classes_:[8,3,1,""],fit:[8,2,1,""],get_params:[8,2,1,""],quantify:[8,2,1,""],set_params:[8,2,1,""]},"quapy.plot":{binary_bias_bins:[8,5,1,""],binary_bias_global:[8,5,1,""],binary_diagonal:[8,5,1,""],error_by_drift:[8,5,1,""],save_or_show:[8,5,1,""]},"quapy.util":{EarlyStop:[8,1,1,""],create_if_not_exist:[8,5,1,""],create_parent_dir:[8,5,1,""],download_file:[8,5,1,""],download_file_if_not_exists:[8,5,1,""],get_quapy_home:[8,5,1,""],map_parallel:[8,5,1,""],parallel:[8,5,1,""],pickled_resource:[8,5,1,""],save_text_file:[8,5,1,""],temp_seed:[8,5,1,""]},quapy:{classification:[9,0,0,"-"],data:[10,0,0,"-"],error:[8,0,0,"-"],evaluation:[8,0,0,"-"],functional:[8,0,0,"-"],isbinary:[8,5,1,""],method:[11,0,0,"-"],model_selection:[8,0,0,"-"],plot:[8,0,0,"-"],util:[8,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","property","Python property"],"4":["py","attribute","Python attribute"],"5":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:property","4":"py:attribute","5":"py:function"},terms:{"0":[0,1,3,4,5,8,9,10,11],"00":[0,1,4,8],"000":1,"0001":[4,11],"000e":1,"001":[4,9,11],"009":1,"01":[8,9,11],"017":1,"018":0,"02":1,"021":0,"02552":4,"03":1,"034":1,"035":1,"037":1,"04":1,"041":1,"042":1,"046":1,"048":1,"05":[5,8],"055":1,"063":0,"065":0,"070":1,"073":1,"075":1,"078":0,"081":0,"082":[0,1],"083":0,"086":0,"091":1,"099":0,"1":[0,1,3,4,5,8,9,10,11],"10":[0,1,4,5,8,9,11],"100":[0,1,3,4,5,9,10,11],"1000":[0,4,11],"10000":4,"100000":4,"101":[4,10],"1010":4,"1024":11,"104":0,"108":1,"109":0,"11":[0,1,6],"11338":0,"114":1,"1145":[],"12":9,"120":0,"1215742":0,"1271":0,"13":[0,9],"139":0,"14":[3,11],"142":1,"146":[3,11],"1473":0,"148":0,"1484":0,"15":[3,8,11],"150":0,"153":0,"157":0,"158":0,"159":0,"1593":0,"1594":0,"1599":0,"161":0,"163":[0,1],"164":[0,3,11],"167":0,"17":0,"1771":1,"1775":[0,3],"1778":[0,3],"178":0,"1823":0,"1839":0,"18399":0,"1853":0,"19":[3,10,11],"193":0,"199151":0,"19982":4,"1st":0,"2":[0,1,3,5,8,10,11],"20":[5,8,11],"200":[1,9],"2000":0,"2002":[3,11],"2011":4,"2013":[3,11],"2015":[0,2,3,11],"2016":[3,10,11],"2017":[0,3,11],"2018":[0,3,10],"2019":[3,11],"2020":4,"20342":4,"206":0,"207":0,"208":0,"21":[1,3,5,8,11],"210":8,"211":0,"2126":0,"2155":0,"21591":0,"218":[3,11],"2184":0,"219e":1,"22":[0,3,9,10,11],"222":0,"222046":0,"226":0,"229":1,"229399":0,"23":9,"235":1,"238":0,"2390":0,"24":[0,9],"243":0,"248563":0,"24866":4,"24987":4,"25":[0,5,8,9,11],"25000":0,"256":[0,9],"26":9,"261":0,"265":0,"266":0,"267":0,"27":[1,3,9,11],"270":0,"2700406":[],"271":0,"272":0,"274":0,"275":1,"27th":[0,3,10],"28":3,"280":0,"281":0,"282":0,"283":[0,1],"288":0,"289":0,"2971":0,"2nd":0,"2t":1,"2x5fcv":0,"3":[0,1,3,5,6,8,9,10,11],"30":[0,1,3,11],"300":[0,1,9],"305":0,"306":0,"312":0,"32":[0,6],"33":[0,5],"331":0,"333":0,"335":0,"337":0,"34":[0,3,11],"341":0,"346":1,"347":0,"350":0,"351":0,"357":1,"359":0,"361":0,"366":1,"372":0,"373":0,"376132":0,"3765":0,"3813":0,"3821":0,"383e":1,"387e":1,"392":0,"394":0,"399":0,"3f":[1,6],"3rd":0,"4":[0,1,3,4,5,8,11],"40":[0,3,4,11],"404333":0,"407":0,"41":[3,11],"412":0,"412e":1,"413":0,"414":0,"417":0,"41734":4,"42":[1,8],"421":0,"4259":0,"426e":1,"427":0,"430":0,"434":0,"435":1,"43676":4,"437":0,"44":0,"446":0,"45":[3,5,11],"452":0,"459":1,"4601":0,"461":0,"463":0,"465":0,"466":0,"470":0,"48":[3,11],"481":0,"48135":4,"486":0,"4898":0,"492":0,"496":0,"4960":1,"497":0,"5":[0,1,3,4,5,8,9,10,11],"50":[0,5,8,11],"500":[0,1,4,5,11],"5000":[1,5],"5005":4,"507":0,"508":0,"512":[9,11],"514":0,"515e":1,"530":0,"534":0,"535":0,"535e":1,"5379":4,"539":0,"541":1,"546":0,"5473":0,"54it":4,"55":5,"55it":4,"565":1,"569":0,"57":0,"573":0,"578":1,"583":0,"591":[3,11],"5f":4,"5fcv":11,"6":[0,1,3,5,8,10,11],"60":0,"600":1,"601":0,"604":[3,11],"606":0,"625":0,"627":0,"633e":1,"634":1,"64":[9,11],"640":0,"641":0,"650":0,"653":0,"654":1,"66":[1,11],"665":0,"667":0,"669":0,"67":5,"683":0,"688":0,"691":0,"694582":0,"7":[1,5,9],"70":0,"700":0,"701e":1,"711":0,"717":1,"725":1,"730":0,"735":0,"740e":1,"748":0,"75":[0,5,8],"762":0,"774":0,"778":0,"787":0,"794":0,"798":0,"8":[0,1,5,10,11],"8000":0,"830":0,"837":1,"858":1,"861":0,"87":[0,3,11],"8788":0,"889504":0,"8d2fhsgcvn0aaaaa":[],"9":[0,1,3,5,11],"90":[5,8],"901":0,"909":1,"914":1,"917":0,"919":0,"922":0,"923":0,"935":0,"936":0,"937":0,"945":1,"95":8,"9533":0,"958":0,"97":0,"979":0,"982":0,"99":8,"abstract":[3,9,11],"case":[0,1,3,4,5,8,11],"class":[0,1,3,4,5,6,8,9,10,11],"d\u00edez":[3,11],"default":[1,3,8,9,10],"do":[0,1,3,4,8],"final":[1,3,5],"float":[0,3,8,10,11],"function":[0,1,3,4,5,6,7,9,11],"g\u00e1llego":[0,3,11],"gonz\u00e1lez":[3,11],"import":[0,1,3,4,5,6],"int":[0,5,8,10,11],"long":4,"new":[0,3,10,11],"p\u00e9rez":[0,3,11],"return":[0,1,3,4,5,8,9,10,11],"rodr\u0131":[3,11],"static":[3,11],"true":[0,1,3,4,5,6,8,9,10,11],"try":4,"while":[3,5,8,9,11],A:[0,3,8,10,11],As:[3,4],By:[1,3,8],For:[0,1,5,6,8,11],If:[3,5,8,9,11],In:[0,1,2,3,4,5,6,11],It:[3,4,5],One:[0,1,3,11],That:[1,4],The:[0,1,2,4,5,6,8,9,10,11],Then:3,These:0,To:[5,10],_:5,__:9,__class__:5,__name__:5,_adjust:[],_ae_:[],_classify_:11,_error_name_:11,_fit_learner_:11,_kld_:[],_labelledcollection_:11,_learner_:11,_mean:[],_min_df_:10,_nkld_:[],_posterior_probabilities_:11,_q_:[],_rae_:[],_svmperf_:[],ab:[],aboud:3,about:[0,5],abov:[0,3,5],absolut:[1,3,5,6],absolute_error:8,abstractmethod:3,acc:[1,3,5,6,8,11],acc_error:8,accept:3,access:[0,3],accommod:0,accord:[1,3,4,8],accordingli:5,accuraci:[1,5],accuracy_polici:11,achiev:[1,3,4,5],acm:[0,3,10,11],across:[0,1,4,5,6],action:[0,11],acut:0,ad:6,add:[3,4,8],add_word:10,addit:3,addition:[0,11],adjust:[3,6,11],adjusted_quantif:8,adjustedclassifyandcount:11,adopt:[3,4],advanc:[0,6],advantag:3,ae:[1,2,5,8],ae_:1,affect:8,afterward:[9,11],again:5,against:5,aggreg:[1,4,5,6,7,8],aggregativeprobabilisticquantifi:[3,11],aggregativequantifi:[3,11],aggregg:11,aim:[4,5],al:[0,2],alaiz:[3,11],alegr:[3,11],alejandro:4,alia:[3,11],all:[0,1,2,3,5,8,9,11],allia:3,allow:[0,1,2,3,5,8,10,11],almost:3,along:[0,3,11],alreadi:[3,11],also:[0,1,2,3,5,6,9],altern:4,although:[3,4,5,9,11],alwai:[3,4,5],among:3,an:[0,1,2,3,4,5,6,8,9,11],analys:[5,6],analysi:[0,3,6,9,10,11],analyz:5,ani:[0,1,3,4,5,6,8,10,11],anoth:[0,1,3,5],anyon:0,api:6,app:8,appeal:1,appear:5,append:5,appli:[2,3,4,5,8,10],appropri:4,approxim:[1,5,11],ar:[0,1,3,4,5,8,9,10,11],archive_filenam:8,archive_path:8,arg:[8,10,11],args_i:8,argu:4,argument:[0,1,3,5],arifici:8,aris:1,around:1,arrai:[1,3,5,8,10],articl:[3,4,11],artifici:[0,1,3,4,5,6,8],artificial_prevalence_predict:8,artificial_prevalence_protocol:8,artificial_prevalence_report:8,artificial_prevalence_sampl:8,artificial_sampling_ev:[1,4],artificial_sampling_gener:[0,10],artificial_sampling_index_gener:10,artificial_sampling_predict:[1,5],artificial_sampling_report:1,arxiv:4,asarrai:1,asdataload:9,asonam:0,assess:4,assign:[3,8],associ:10,assum:[1,6,11],assumpt:[1,5,6],astyp:10,attempt:3,attribut:11,august:0,autom:[0,3,6],automat:[0,1],av:[3,11],avail:[0,1,2,3,5,6],averag:[1,3],avoid:1,axi:5,b:[0,10],balanc:[0,4],band:5,bar:8,barranquero:[2,3,11],base:[0,3,6,7,8,9],base_classifi:5,base_estim:3,base_quantifier_class:11,baseestim:[9,11],baselin:6,basequantifi:[3,8,11],basic:[5,11],batch_siz:9,batch_size_test:9,been:[0,3,4,5,10,11],befor:[3,11],behav:[3,5],being:[4,8],belief:1,belong:3,below:[0,2,3,5,10],best:[4,8,11],best_model:8,best_model_:4,best_params_:4,better:4,between:[4,5,6],beyond:5,bia:6,bias:5,bidirect:11,bin:[5,11],bin_bia:5,bin_diag:5,binar:[8,10],binari:[3,5,6,10,11],binary_bias_bin:[5,8],binary_bias_glob:[5,8],binary_diagon:[5,8],binary_quantifi:11,binaryquantifi:11,block:0,bool:[8,9,11],both:5,bound:8,box:5,breast:0,brief:1,broken:5,budg:1,budget:[1,4],build:11,bypass:11,c:[3,4,9,10,11],calibr:3,calibratedclassifi:3,calibratedclassifiercv:3,calibratedcv:11,call:[0,1,5,8,9,11],callabl:[0,8,10],can:[0,1,2,3,4,5,8],cancer:0,cannot:11,cardiotocographi:0,care:[9,11],carri:3,casa_token:[],castano:[3,11],castro:[3,11],categor:3,categori:1,cc:[3,5,11],ceil:8,center:5,chang:[0,1,3,11],character:[3,6],characteriz:[0,3,11],charg:[0,8],check:[3,4],checkpoint:[9,11],checkpointdir:11,checkpointnam:11,checkpointpath:9,choic:4,chosen:[4,8],cl:0,class2int:10,class_weight:4,classes_:[8,10,11],classif:[0,1,3,7,8,10,11],classif_posterior:[3,11],classif_predict:[3,11],classif_predictions_bin:11,classifi:[1,4,5,6,11],classifier_net:9,classifiermixin:9,classifyandcount:[3,11],classmethod:[0,10,11],classnam:10,clean_checkpoint:11,clean_checkpoint_dir:11,clear:5,clearer:1,clearli:5,clip:8,close:1,closer:1,cmc:0,cnn:3,cnnnet:[3,9],code:[0,3,4,5],coincid:[0,6],col:[0,10],collect:[0,8,10],collet:10,color:[5,8],colormap:8,column:[0,10],com:[],combin:[0,1,4,8,9],combinatio:8,combinations_budget:8,come:0,commandlin:[],common:11,commonli:6,compar:[5,11],comparison:5,compil:[2,3],complet:[3,5],compon:9,compress:0,comput:[1,3,5,8,9,11],computation:4,compute_fpr:11,compute_t:11,compute_tpr:11,concept:6,concur:11,conduct:0,confer:[0,3,10],configur:[4,8],consid:[3,5,10],consist:[0,4,5,10],constrain:[1,5],constructor:3,consult:[0,1],contain:[1,2,3,5,8,9,10,11],contanin:8,content:7,context:8,contrast:1,control:[1,4],conv_block:9,conv_lay:9,convert:[1,3],copi:10,cornel:[],correct:11,correspond:[5,10],cost:1,costli:4,could:[0,1,3,4,5,6,11],count:[4,5,6,10,11],count_:[],counter:10,countvector:10,covari:10,cover:[1,4],coz:[0,3,11],cpu:[1,9],creat:[0,6,8],create_if_not_exist:8,create_parent_dir:8,crisp:3,criteria:4,cross:[3,11],cs:[],csr_matrix:10,csv:10,ctg:0,cuda:[3,11],cumbersom:1,curios:5,current:[3,8,10],custom:[3,6,8],customarili:[3,4],cv:[3,4],cyan:5,dat:[0,9],data:[1,3,4,5,6,7,8,9,11],data_hom:10,datafram:1,dataset:[1,3,4,5,6,7,8,9,11],dataset_nam:10,deal:0,decaesteck:[3,11],decim:1,decis:3,decision_funct:9,dedic:1,deep:[3,8,9,11],def:[0,1,3,5,8],defin:[0,3,8,9,11],degre:4,del:[0,3,11],delai:8,deliv:3,dens:0,depend:[0,1,4,5,8],describ:[3,11],descript:0,design:4,desir:[0,1],despit:1,detail:[0,1,3,6,11],determin:[1,4,5],detriment:5,devel:10,develop:[4,6],deviat:[0,1,5],devic:[0,3,5,9,11],df:[1,10],df_replac:10,diabet:0,diagon:6,dict:[8,9,10,11],dictionari:8,differ:[0,1,3,4,5,6,8,10],difficult:5,digit:0,dimens:[8,9,10],dimension:[8,10],directli:[0,1,3],directori:[2,10],discoveri:[3,11],discuss:5,displai:[1,5],distanc:11,distant:[1,8],distribut:[0,3,5,8,11],diverg:[1,3],dl:[],doabl:0,doc_embed:11,doc_embedding_s:11,doc_posterior:11,document:[0,1,3,5,10,11],document_embed:9,doe:[0,2,3,8],doi:[],done:3,dot:5,down:5,download:[0,2,3],download_fil:8,download_file_if_not_exist:8,drawn:[0,1,4],drift:6,drop:11,drop_p:9,ds:[3,11],ds_polici:11,ds_policy_get_posterior:11,dtype:1,dump:10,dure:[1,5],dynam:[3,11],e:[0,1,3,4,5,6,8,10,11],eacc:11,each:[0,1,3,4,5,8,9,10,11],early_stop:11,earlystop:8,easili:[0,2,5],ecc:11,edu:[],eemq:11,effect:3,effici:3,ehdi:11,either:[1,3,8,11],element:3,elm:[3,11],em:11,embed:[3,9],embedding_s:9,empti:10,emq:[5,11],encod:10,end:[4,8],endeavour:6,enough:5,ensembl:[0,6,11],ensemblefactori:11,ensure_probabilist:11,entir:[0,3,4,5],environ:[1,3,4,5,8],ep:[1,8],epacc:11,epoch:[9,11],epsilon:[1,11],equal:[1,8],equidist:[0,8],equip:[3,5],err:8,err_drift:5,err_nam:8,error:[3,4,6,7],error_:[],error_by_drift:[5,8],error_funct:1,error_metr:[1,4,8],error_nam:[5,8,11],establish:8,estim:[1,3,5,6,8,9,11],estim_prev:[1,5,8],estim_preval:[3,6],esuli:[0,2,3,10,11],et:[0,2],etc:6,eval_budget:[4,8],evalu:[0,3,4,5,6,7],everi:[3,9,11],everyth:3,evinc:5,ex:[],exact:0,exactli:0,exampl:[0,1,3,4,5,8,9,11],exce:8,excel:0,except:[3,8],exemplifi:0,exhibit:[4,5],exist:8,expand_frame_repr:1,expect:6,expectationmaximizationquantifi:[3,11],experi:[1,2,3,4,5,8],explain:[1,5],explicitlossminim:11,explicitlossminimis:11,explor:[4,8],express:10,ext:2,extend:[2,3,11],extens:[0,2,5],extern:3,extract:[1,8],f1:[1,9],f1_error:8,f1e:[1,8],f:[0,1,3,4,5,6,10,11],fabrizio:4,facilit:6,fact:[3,5],fals:[1,3,5,8,9,10,11],famili:3,familiar:3,fast:8,faster:[0,10],feat1:10,feat2:10,featn:10,featur:0,feature_extract:10,fetch:[0,6],fetch_review:[0,1,3,4,5,10],fetch_twitt:[0,3,6,10],fetch_ucidataset:[0,3,10],fetch_ucilabelledcollect:[0,10],ff_layer:11,fhe:0,file:[0,5,10],fin:0,find:[0,4],finish:4,first:[0,1,2,3,5,8,10,11],fit:[1,3,4,5,6,8,9,10,11],fit_learn:[3,11],fit_transform:10,fix:[1,4],float64:1,fold:[3,11],folder:0,follow:[0,1,3,4,5,6],fomart:10,for_model_select:[0,10],form:[0,9],format:[0,5,10],former:[2,9,11],forward:[9,11],found:[0,3,4],four:3,fp:11,fpr:8,framework:6,frequenc:0,from:[0,1,3,4,5,6,8,10,11],from_csv:10,from_nam:[1,8],from_spars:10,from_text:10,full:1,fulli:0,func:8,further:[0,1,3],fusion:[0,3,11],futur:3,g:[0,1,3,4,6,8,10,11],gao:[0,3,10,11],gasp:[0,10],gen:8,gen_data:5,gen_fn:8,gen_prevalence_predict:8,gener:[0,1,3,4,5,8,9,10,11],generation_func:8,german:0,get:[0,1,5,8,9],get_aggregative_estim:11,get_nprevpoints_approxim:[1,8],get_param:[3,8,9,11],get_probability_distribut:11,get_quapy_hom:8,github:[],given:[1,3,4,11],goe:4,good:[4,5],got:4,govern:1,grant:11,grid:[4,8,11],gridsearchcv:4,gridsearchq:[4,8],group:3,guarante:11,guez:[3,11],gzip:0,ha:[3,4,5],haberman:[0,3],handl:0,happen:[4,5],hard:3,harder:5,harri:0,have:[0,1,2,3,4,5,9,10,11],hcr:[0,3,10],hdy:[6,11],held:[3,4],helling:11,hellingerdist:8,hellingerdistancei:[3,11],help:5,here:1,hidden:5,hidden_s:9,hide:5,high:5,higher:[1,5],hlt:[],hold:6,home:10,hook:[9,11],how:[0,1,3,4,5,11],howev:[0,4,5,11],hp:[0,3,4,10],html:[],http:[],hyper:[4,8],hyperparam:4,hyperparamet:[3,8,11],i:[0,1,3,4,5,8,10,11],id:[0,3,10],idf:0,ieee:0,ignor:[8,9,10,11],iid:[1,5,6],illustr:[3,4,5],imdb:[0,5,10],implement:[0,1,3,4,5,6,11],impos:4,improv:3,includ:[0,1,3,5,6],inde:[3,4],index:[0,3,6,10],indextransform:10,indic:[0,1,3,4,5,8,10,11],individu:[1,3],infer:0,inform:[0,1,3,4,8,10,11],infrequ:10,inherit:3,init:3,init_hidden:[9,11],initi:0,inplac:[1,3,10],input:[3,5,8,9],insight:5,inspir:3,instal:[0,3,6],instanc:[0,3,4,5,6,8,9,10,11],instanti:[0,1,3,4],instead:[1,3,4,9,11],integ:[3,10],integr:6,interest:[1,5,6],interestingli:5,interfac:[0,1],intern:[0,3,10],interpret:[5,6],interv:[1,5,8],introduc:1,invok:[0,1,3,8,10],involv:[2,5],ionospher:0,iri:0,irrespect:5,isaggreg:11,isbinari:[8,10,11],isometr:5,isprobabilist:11,isti:[],item:8,iter:[0,8,11],its:[3,4],itself:[3,11],j:[0,3,11],joachim:3,job:[2,8],joblib:2,just:[1,3],k:[3,6,11],kei:8,kept:10,kernel_height:9,kfcv:[0,10,11],kindl:[0,1,3,5,10],kld:[1,2,8,9],know:3,knowledg:[0,3,10,11],known:[0,3,4],kullback:[1,3],kwarg:[9,10,11],l1:11,label:[0,3,4,5,6,8,9,10,11],labelledcollect:[0,3,4,8,10,11],larg:4,largest:8,last:[1,3,5],lastli:3,latex:5,latinn:[3,11],latter:[9,11],layer:3,lead:1,learn:[1,2,3,4,6,8,11],learner:[3,4,11],least:[0,10],leav:10,legend:8,leibler:[1,3],less:[8,10],let:[1,3],level:11,leverag:3,like:[0,1,3,5],limit:[5,8],line:[1,3],linear:5,linear_model:[1,3,4,6],linearsvc:[3,5],linspac:5,list:[0,5,8,10],listedcolormap:8,literatur:[0,1,4,6],load:[0,3,8,10],loader:0,loader_func:[0,10],local:8,log:10,logist:[1,3,9,11],logisticregress:[1,3,4,6],logscal:8,logspac:4,longer:8,look:[0,1,3,5],loss:[6,9,11],low:5,lower:[5,8],lower_is_bett:8,lowest:5,lr:[1,3,9,11],lstm:3,lstm_class_nlay:9,lstm_hidden_s:11,lstm_nlayer:11,lstmnet:9,m:[3,8,11],machin:[1,4,6],made:[0,2,11],mae:[1,4,6,8,9,11],mae_loss:11,main:5,maintain:[3,11],make:[0,1,3],mammograph:0,manag:[0,3,10],mani:[1,3,4,5,6,11],manner:0,manual:0,map:[1,9],map_parallel:8,matplotlib:[2,8],matric:[0,5,10],matrix:5,max:11,max_it:11,max_sample_s:11,maxim:6,maximum:[1,8],maximumlikelihoodprevalenceestim:11,md:[],mean:[0,1,3,4,5,6,10,11],mean_absolute_error:8,mean_relative_absolute_error:8,measur:[2,3,4,5,6,11],mediansweep2:11,mediansweep:11,member:3,mention:3,merg:5,meta:[6,7,8],method:[0,1,4,5,6,7,8],method_data:5,method_nam:[5,8],metric:[1,3,4,6,8],might:1,min_df:[1,3,4,5,10],min_po:11,mine:[0,3,11],minim:8,minimum:10,minimun:10,mining6:10,mixtur:3,mkld:[1,8,11],mnkld:[1,8,11],modal:4,model:[0,1,5,6,8,11],model_select:[4,7],modifi:[3,8],modul:[0,1,3,5,6,7],moment:[0,3],more:[3,5,8],moreo:[0,3,4,10],most:[0,3,5,6,11],movi:0,mrae:[1,6,8,9,11],ms2:11,ms:11,mse:[1,3,6,8,11],msg:11,multiprocess:8,multivari:[3,11],must:3,my:[],my_arrai:8,my_custom_load:0,my_data:0,mycustomloss:3,n:[0,1,8],n_bin:[5,8],n_class:[1,3,8,9,10,11],n_compon:9,n_epoch:11,n_job:[1,3,4,8,10,11],n_preval:[0,8,10],n_prevpoint:[1,4,5,8],n_repeat:[1,8],n_repetit:[1,4,5,8],name:[5,8,9,10],nativ:6,natur:[1,8],natural_prevalence_predict:8,natural_prevalence_protocol:8,natural_prevalence_report:8,natural_sampling_gener:10,natural_sampling_index_gener:10,nbin:[5,8],ndarrai:[1,3,8,10,11],necessarili:11,need:[0,3,9,11],neg:[0,5],nest:9,net:9,network:[0,10,11],neural:[0,7,8,10],neuralclassifiertrain:[3,9],neutral:0,next:[4,8],nfold:[0,10],nkld:[1,2,6,8,9],nn:[9,11],nogap:10,non:[3,11],non_aggreg:[7,8],none:[1,4,8,9,10,11],nonetheless:4,nor:3,normal:[0,1,3,11],normalize_preval:8,note:[1,3,4,5],now:5,nowadai:3,np:[1,3,4,5,8],npp:8,nprevpoint:8,nrepeat:[0,10],num_prevalence_combin:[1,8],number:[0,1,3,5,8,10,11],numer:[0,1,3,6,10],numpi:[2,4,8,11],o_l6x_pcf09mdetq4tu7jk98mxfbgsxp9zso14jkuiyudgfg0:[],object:[0,8,9,10,11],observ:1,obtain:[1,4],occur:[5,10],occurr:10,octob:[0,3],offer:[3,6],older:2,omd:[0,10],ommit:1,onc:[1,3,5,8],one:[0,1,3,4,5,8,9,11],ones:[1,3,5,8,10],onevsal:[3,11],onli:[0,3,5,8,11],open:[0,6],oper:3,opt:4,optim:[2,3,4,8,11],optimize_threshold:11,option:[0,1,3,5,8,10,11],order:[0,2,3,5,8,10,11],order_bi:11,org:[],orient:[3,6,8,11],origin:[0,3,10,11],os:0,other:[1,3,5,6,8],otherwis:[0,3,11],our:[],out:[3,4,5],outcom:5,outer:8,output:[0,1,3,4,11],over:[3,4],overal:1,overestim:5,overrid:3,overridden:[3,9,11],own:4,p:[0,3,8,11],p_hat:8,pacc:[1,3,5,11],packag:[0,2,3,6,7],pad:9,pad_length:9,padding_length:9,page:[0,2,6],pageblock:0,pair:0,panda:[1,2],paper:[0,3,11],parallel:[1,3,8],param:[4,8,9,10,11],param_grid:[4,8,11],param_mod_sel:11,param_model_sel:11,paramet:[1,3,4,8,9,10,11],part:[3,10],particular:[0,1,3],particularli:1,pass:[0,1,5,9,11],past:1,patch:[2,3],path:[0,3,5,8,10],patienc:[8,9,11],pattern:[3,11],pca:9,pcalr:9,pcc:[3,4,5,11],pd:1,pdf:5,peopl:[],perf:6,perform:[1,3,4,5,6,8,9,11],phonem:0,pick:4,pickl:[3,8,10],pickle_path:8,pickled_resourc:8,pii:[],pip:2,pipelin:9,pkl:8,plai:0,plan:3,pleas:3,plot:[6,7],png:5,point:[0,1,3,8],polici:[3,11],popular:6,portion:4,pos_class:[8,10],posit:[0,3,5],possibl:[1,3,8,9],posterior:[3,8,11],posterior_prob:[3,11],postpon:3,potter:0,pp:[0,3],practic:[0,4],pre:[0,3],prec:[0,8],precis:[0,1],preclassifi:3,predict:[3,4,5,8,9,11],predict_proba:[3,9,11],predictor:1,prepare_svmperf:[2,3],preprint:4,preprocess:[0,1,3,7,8],present:[0,3,10],preserv:[1,5],pretti:5,prev:[0,1,8,10],prevail:3,preval:[0,1,3,4,5,6,8,10,11],prevalence_estim:8,prevalence_from_label:8,prevalence_from_prob:8,prevalence_linspac:8,prevel:11,previou:3,previous:11,prevs_estim:11,prevs_hat:[1,8],princip:9,print:[0,1,3,4,6],prior:[1,3,4,5,6],priori:[3,11],probabilist:[3,11],probabilisticadjustedclassifyandcount:11,probabilisticclassifyandcount:11,probabl:[1,3,4,5,6,11],problem:[0,3,5,11],procedur:[3,6,11],proceed:[0,3,10],process:[3,4,8],processor:3,procol:1,produc:[0,1,5,8],product:3,progress:8,properli:0,properti:[3,8,9,10,11],proport:[3,4,8,11],propos:[2,3,11],protocl:8,protocol:[0,3,4,5,6,8],provid:[0,3,5,6],ptecondestim:11,ptr:[3,11],ptr_polici:11,purpos:[0,11],python:[0,6],pytorch:2,q:[0,2,3,8,9],qacc:9,qdrop_p:11,qf1:9,qgm:9,qp:[0,1,3,4,5,6,8],quanet:[2,6,9,11],quanetmodul:11,quanettrain:11,quantif:[0,1,6,8,10,11],quantifi:[3,4,5,6,8,11],quantification_error:8,quantiti:8,quapi:[0,1,2,3,4,5],quapy_data:0,quay_data:10,quevedo:[0,3,11],quick:[],r:[0,3,11],rae:[1,2,8],rais:[3,8],rand:8,random:[1,3,4,5,8],random_se:[1,8],random_st:10,randomli:0,rang:[0,5],rank:3,rare:10,rate:3,rather:[1,4],raw:10,rb:0,re:[3,4,10],read:10,reader:[7,8],readm:[],real:10,reason:[3,5,6],receiv:[0,3,5],recip:[9,11],recognit:[3,11],recommend:[1,5],recurr:[0,3,10],red:0,red_siz:[3,11],reduc:[0,10],reduce_column:[0,10],refer:10,refit:[4,8],regard:4,regist:[9,11],regress:9,regressor:[1,3,11],reindex_label:10,rel:[1,3],relative_absolute_error:8,reli:[1,3],reliabl:[3,11],rememb:5,remov:10,repeat:[8,10],repetit:8,repl:10,replac:[0,3,10],replic:[1,4,8],report:1,repositori:0,repr_siz:9,repres:[1,3,5,10,11],represent:[0,3],request:[0,8,11],requir:[0,1,3,6,9],reset_net_param:9,resourc:8,respect:[0,1,5,11],respond:3,rest:[10,11],result:[1,2,3,4,5,6,11],retain:[0,3],retrain:4,return_constrained_dim:8,reus:[0,3,8],review:[5,6,10],reviews_sentiment_dataset:0,rewrit:5,right:4,role:0,root:6,roughli:0,routin:8,row:10,run:[0,1,2,3,4,5,8,9,11],s003132031400291x:[],s:[0,1,3,4,5,8,9,10],saeren:[3,11],sai:11,said:3,same:[0,3,5,10],sampl:[0,1,3,4,5,6,8,10,11],sample_s:[0,1,3,4,5,8,10,11],sampling_from_index:[0,10],sampling_index:[0,10],sander:[0,10],save:[5,8],save_or_show:8,save_text_fil:8,savepath:[5,8],scall:10,scenario:[1,3,4,5,6],scienc:[3,11],sciencedirect:[],scikit:[2,3,4],scipi:[2,10],score:[0,1,4,10],script:[1,2,3,6],se:[1,8],search:[3,4,6,8,11],sebastiani:[0,3,4,10,11],second:[0,1,3,5,8],section:4,see:[0,1,2,3,4,5,6],seed:[1,4,8],seem:3,seemingli:5,seen:5,select:[0,3,6,8,11],selector:3,self:[3,9,10,11],semeion:0,semev:0,semeval13:[0,10],semeval14:[0,10],semeval15:[0,10],semeval16:[0,6,10],sentenc:10,sentiment:[3,6,10,11],separ:[8,10],seri:0,serv:3,set:[0,1,3,4,5,6,8,9,10,11],set_opt:1,set_param:[3,8,9,11],set_siz:9,sever:0,sh:[2,3],shape:[5,8],share:[0,10],shift:[1,4,6,8],shoud:3,should:[0,1,3,4,5,6,9,10,11],show:[0,1,3,4,5,8,10],show_std:[5,8],showcas:5,shown:[1,5],shuffl:[9,10],signific:1,silent:[8,9,11],similar:11,simpl:[0,3,5,9,11],simplest:3,simplex:[0,8],simpli:[1,2,3,4,5,6,9,11],sinc:[0,1,3,5,8,9,11],singl:[1,3,6,11],size:[0,1,3,8,10,11],sklearn:[1,3,4,5,6,9,10,11],sld:3,slice:8,smooth:[1,8],smooth_limits_epsilon:8,so:[0,1,3,5,8,9,11],social:[0,3,10,11],soft:3,softwar:0,solid:5,solv:4,solve_adjust:11,some:[0,1,3,5],some_arrai:8,sometim:1,sonar:0,sourc:[2,3,6],sout:11,space:[0,4],spambas:0,spars:[0,10],special:[0,5,10],specif:[3,4],specifi:[0,1,3,5,8,10,11],spectf:0,spectrum:[0,1,4,5],speed:3,split:[0,3,4,5,10,11],split_stratifi:10,splitstratifi:10,spmatrix:10,squar:[1,3],sst:[0,10],stabil:1,standard:[0,1,5,10],start:4,stat:10,state:8,statist:[0,1,11],stats_siz:11,stdout:8,step:[5,8],store:[0,10],str:[0,8,10],strategi:[3,4],stratifi:[0,3],stride:9,string:[1,8,10],strongli:[4,5],strprev:[0,1,8],structur:3,studi:[0,3,11],subclass:[9,11],subinterv:5,sublinear_tf:10,submit:0,submodul:7,subobject:9,suboptim:4,subpackag:7,subsequ:[10,11],subtract:[0,8],subtyp:10,suffic:5,suffici:11,sum:11,summar:0,supervis:[4,6],support:[3,6],surpass:1,svm:[3,5,6],svm_light:[],svm_perf:[],svm_perf_quantif:[2,3],svmae:[3,11],svmkld:[3,11],svmnkld:[3,11],svmperf:[2,3,7,8],svmperf_bas:[9,11],svmperf_hom:3,svmq:[3,11],svmrae:[3,11],syntax:5,system:4,t50:11,t:[0,1,3],take:[0,3,5,8,9,11],taken:3,target:[3,5,6,8,11],task:[3,4,11],temp_se:8,tempor:8,tend:5,tendenc:5,term:[0,1,3,4,5,6,10,11],test:[0,1,3,4,5,6,8,10,11],test_bas:[],test_dataset:[],test_method:[],test_path:[0,10],test_sampl:8,test_split:10,text2tfidf:[0,1,3,10],text:[0,3,8,10,11],textclassifiernet:9,textual:[0,6,10],tf:[0,10],tfidf:[0,4,5,10],tfidfvector:10,than:[1,4,5,8,10],thei:[0,3],them:[0,3,9,11],theoret:4,thereaft:1,thi:[0,1,2,3,4,5,6,8,9,11],thing:3,third:[1,5],those:[1,3,4,5,8,9],though:3,three:[0,5],thresholdoptim:11,through:[3,8],thu:[3,4,5,11],tictacto:0,time:[0,1,3,8,10],timeout:8,timeouterror:8,timer:8,titl:8,tj:[],tn:11,token:[0,10],tool:[1,6],top:[3,11],torch:[3,9,11],torchdataset:9,toward:5,tp:11,tpr:8,tqdm:2,tr_iter_per_poch:11,tr_prev:[5,8,11],tradition:1,train:[0,1,3,4,5,6,8,10,11],train_path:[0,10],train_prev:[5,8],train_prop:10,train_siz:10,train_val_split:11,training_help:11,training_preval:5,training_s:5,transact:[3,11],transform:[0,9,10],transfus:0,trivial:3,true_prev:[1,5,8],true_preval:6,turn:4,tweet:[0,3,10,11],twitter:[6,10],twitter_sentiment_datasets_test:0,twitter_sentiment_datasets_train:0,two:[0,1,3,4,5,8],type:[0,3,9],typic:[1,4,5],uci:6,unabl:0,unadjust:5,unbias:5,uncompress:0,under:1,underestim:5,unfortun:5,unifi:0,uniform_prevalence_sampl:8,uniform_sampl:10,uniform_sampling_index:10,uniform_simplex_sampl:8,uniformli:8,union:[8,11],uniqu:10,unit:0,unix:0,unk:10,unless:11,unlik:[1,4],unus:[8,11],up:[3,4,8,11],updat:9,url:8,us:[0,1,3,4,5,6,8,10,11],user:[0,1,5],utf:10,util:[7,9],v:[3,11],va_iter_per_poch:11,val:[0,10],val_split:[3,4,8,9,11],valid:[0,1,3,4,5,8,10,11],valid_loss:[3,9],valid_polici:11,valu:[0,1,3,8,9,10,11],variabl:[1,3,5,8],varianc:[0,5],variant:[5,6,11],varieti:4,variou:[1,5],vector:[0,10],verbos:[0,1,4,8,9,10,11],veri:[3,5],versatil:6,version:2,vertical_xtick:8,via:[0,2,3,11],view:5,visual:[5,6],vocab_s:9,vocabulari:10,vocabulary_s:[3,9,10],vs:3,w:[0,3,10,11],wa:[0,3,5,10,11],wai:[1,11],want:[3,4],warn:10,wb:[0,10],wdbc:0,we:[0,1,3,4,5,6],weight:10,weight_decai:9,well:[0,3,4,5,9],were:0,what:3,when:[0,1,3,4,5,8],whenev:[5,8],where:[3,5,8,10,11],wherebi:4,whether:[8,10,11],which:[0,1,3,4,5,8,10,11],white:0,whole:[0,1,3,4,8],why:3,wide:5,wiki:[0,3],wine:0,within:[8,9,11],without:[1,3,8],word:[1,3,6,10],work:[1,3,4,5,9],worker:1,wors:[4,5],would:[0,1,3,5,6,8,11],wrapper:8,written:6,www:[],x:[5,8,9,10,11],xavier_uniform:9,xlrd:[0,2],xy:10,y:[5,9,10,11],y_:11,y_pred:8,y_true:8,ye:10,yeast:0,yield:[5,8],you:[2,3],your:3,z:0,zero:0,zfthyovrzwxmgfzylqw_y8cagg:[],zip:[0,5]},titles:["Datasets","Evaluation","Installation","Quantification Methods","Model Selection","Plotting","Welcome to QuaPy\u2019s documentation!","quapy","quapy package","quapy.classification package","quapy.data package","quapy.method package"],titleterms:{"function":8,A:6,The:3,ad:0,aggreg:[3,11],base:[10,11],bia:5,classif:[4,9],classifi:3,content:[6,8,9,10,11],count:3,custom:0,data:[0,10],dataset:[0,10],diagon:5,distanc:3,document:6,drift:5,emq:3,ensembl:3,error:[1,5,8],evalu:[1,8],ex:[],exampl:6,expect:3,explicit:3,featur:6,get:[],hdy:3,helling:3,indic:6,instal:2,introduct:6,issu:0,learn:0,loss:[2,3,4],machin:0,maxim:3,measur:1,meta:[3,11],method:[3,9,11],minim:3,model:[3,4],model_select:8,modul:[8,9,10,11],network:3,neural:[3,9,11],non_aggreg:11,orient:[2,4],packag:[8,9,10,11],perf:2,plot:[5,8],preprocess:10,process:0,protocol:1,quanet:3,quantif:[2,3,4,5],quapi:[6,7,8,9,10,11],quick:6,reader:10,readm:[],requir:2,review:0,s:6,select:4,sentiment:0,start:[],submodul:[8,9,10,11],subpackag:8,svm:2,svmperf:9,tabl:6,target:4,test:[],test_bas:[],test_dataset:[],test_method:[],titl:[],twitter:0,uci:0,util:8,variant:3,welcom:6,y:3}}) \ No newline at end of file +Search.setIndex({docnames:["Datasets","Evaluation","Installation","Methods","Model-Selection","Plotting","index","modules","quapy","quapy.classification","quapy.data","quapy.method"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["Datasets.md","Evaluation.md","Installation.rst","Methods.md","Model-Selection.md","Plotting.md","index.rst","modules.rst","quapy.rst","quapy.classification.rst","quapy.data.rst","quapy.method.rst"],objects:{"":{quapy:[8,0,0,"-"]},"quapy.classification":{methods:[9,0,0,"-"],neural:[9,0,0,"-"],svmperf:[9,0,0,"-"]},"quapy.classification.methods":{LowRankLogisticRegression:[9,1,1,""]},"quapy.classification.methods.LowRankLogisticRegression":{fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural":{CNNnet:[9,1,1,""],LSTMnet:[9,1,1,""],NeuralClassifierTrainer:[9,1,1,""],TextClassifierNet:[9,1,1,""],TorchDataset:[9,1,1,""]},"quapy.classification.neural.CNNnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.LSTMnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.NeuralClassifierTrainer":{device:[9,3,1,""],fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],reset_net_params:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural.TextClassifierNet":{dimensions:[9,2,1,""],document_embedding:[9,2,1,""],forward:[9,2,1,""],get_params:[9,2,1,""],predict_proba:[9,2,1,""],vocabulary_size:[9,3,1,""],xavier_uniform:[9,2,1,""]},"quapy.classification.neural.TorchDataset":{asDataloader:[9,2,1,""]},"quapy.classification.svmperf":{SVMperf:[9,1,1,""]},"quapy.classification.svmperf.SVMperf":{decision_function:[9,2,1,""],fit:[9,2,1,""],predict:[9,2,1,""],set_params:[9,2,1,""],valid_losses:[9,4,1,""]},"quapy.data":{base:[10,0,0,"-"],datasets:[10,0,0,"-"],preprocessing:[10,0,0,"-"],reader:[10,0,0,"-"]},"quapy.data.base":{Dataset:[10,1,1,""],LabelledCollection:[10,1,1,""],isbinary:[10,5,1,""]},"quapy.data.base.Dataset":{SplitStratified:[10,2,1,""],binary:[10,3,1,""],classes_:[10,3,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],stats:[10,2,1,""],vocabulary_size:[10,3,1,""]},"quapy.data.base.LabelledCollection":{Xy:[10,3,1,""],artificial_sampling_generator:[10,2,1,""],artificial_sampling_index_generator:[10,2,1,""],binary:[10,3,1,""],counts:[10,2,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],natural_sampling_generator:[10,2,1,""],natural_sampling_index_generator:[10,2,1,""],prevalence:[10,2,1,""],sampling:[10,2,1,""],sampling_from_index:[10,2,1,""],sampling_index:[10,2,1,""],split_stratified:[10,2,1,""],stats:[10,2,1,""],uniform_sampling:[10,2,1,""],uniform_sampling_index:[10,2,1,""]},"quapy.data.datasets":{df_replace:[10,5,1,""],fetch_UCIDataset:[10,5,1,""],fetch_UCILabelledCollection:[10,5,1,""],fetch_reviews:[10,5,1,""],fetch_twitter:[10,5,1,""],warn:[10,5,1,""]},"quapy.data.preprocessing":{IndexTransformer:[10,1,1,""],index:[10,5,1,""],reduce_columns:[10,5,1,""],standardize:[10,5,1,""],text2tfidf:[10,5,1,""]},"quapy.data.preprocessing.IndexTransformer":{add_word:[10,2,1,""],fit:[10,2,1,""],fit_transform:[10,2,1,""],index:[10,2,1,""],transform:[10,2,1,""],vocabulary_size:[10,2,1,""]},"quapy.data.reader":{binarize:[10,5,1,""],from_csv:[10,5,1,""],from_sparse:[10,5,1,""],from_text:[10,5,1,""],reindex_labels:[10,5,1,""]},"quapy.error":{absolute_error:[8,5,1,""],acc_error:[8,5,1,""],acce:[8,5,1,""],ae:[8,5,1,""],f1_error:[8,5,1,""],f1e:[8,5,1,""],from_name:[8,5,1,""],kld:[8,5,1,""],mae:[8,5,1,""],mean_absolute_error:[8,5,1,""],mean_relative_absolute_error:[8,5,1,""],mkld:[8,5,1,""],mnkld:[8,5,1,""],mrae:[8,5,1,""],mse:[8,5,1,""],nkld:[8,5,1,""],rae:[8,5,1,""],relative_absolute_error:[8,5,1,""],se:[8,5,1,""],smooth:[8,5,1,""]},"quapy.evaluation":{artificial_prevalence_prediction:[8,5,1,""],artificial_prevalence_protocol:[8,5,1,""],artificial_prevalence_report:[8,5,1,""],evaluate:[8,5,1,""],gen_prevalence_prediction:[8,5,1,""],natural_prevalence_prediction:[8,5,1,""],natural_prevalence_protocol:[8,5,1,""],natural_prevalence_report:[8,5,1,""]},"quapy.functional":{HellingerDistance:[8,5,1,""],adjusted_quantification:[8,5,1,""],artificial_prevalence_sampling:[8,5,1,""],get_nprevpoints_approximation:[8,5,1,""],normalize_prevalence:[8,5,1,""],num_prevalence_combinations:[8,5,1,""],prevalence_from_labels:[8,5,1,""],prevalence_from_probabilities:[8,5,1,""],prevalence_linspace:[8,5,1,""],strprev:[8,5,1,""],uniform_prevalence_sampling:[8,5,1,""],uniform_simplex_sampling:[8,5,1,""]},"quapy.method":{aggregative:[11,0,0,"-"],base:[11,0,0,"-"],meta:[11,0,0,"-"],neural:[11,0,0,"-"],non_aggregative:[11,0,0,"-"]},"quapy.method.aggregative":{ACC:[11,1,1,""],AdjustedClassifyAndCount:[11,4,1,""],AggregativeProbabilisticQuantifier:[11,1,1,""],AggregativeQuantifier:[11,1,1,""],CC:[11,1,1,""],ClassifyAndCount:[11,4,1,""],ELM:[11,1,1,""],EMQ:[11,1,1,""],ExpectationMaximizationQuantifier:[11,4,1,""],ExplicitLossMinimisation:[11,4,1,""],HDy:[11,1,1,""],HellingerDistanceY:[11,4,1,""],MAX:[11,1,1,""],MS2:[11,1,1,""],MS:[11,1,1,""],MedianSweep2:[11,4,1,""],MedianSweep:[11,4,1,""],OneVsAll:[11,1,1,""],PACC:[11,1,1,""],PCC:[11,1,1,""],ProbabilisticAdjustedClassifyAndCount:[11,4,1,""],ProbabilisticClassifyAndCount:[11,4,1,""],SVMAE:[11,1,1,""],SVMKLD:[11,1,1,""],SVMNKLD:[11,1,1,""],SVMQ:[11,1,1,""],SVMRAE:[11,1,1,""],T50:[11,1,1,""],ThresholdOptimization:[11,1,1,""],X:[11,1,1,""],training_helper:[11,5,1,""]},"quapy.method.aggregative.ACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""],solve_adjustment:[11,2,1,""]},"quapy.method.aggregative.AggregativeProbabilisticQuantifier":{posterior_probabilities:[11,2,1,""],predict_proba:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.AggregativeQuantifier":{aggregate:[11,2,1,""],aggregative:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],learner:[11,3,1,""],n_classes:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.CC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ELM":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.EMQ":{EM:[11,2,1,""],EPSILON:[11,4,1,""],MAX_ITER:[11,4,1,""],aggregate:[11,2,1,""],fit:[11,2,1,""],predict_proba:[11,2,1,""]},"quapy.method.aggregative.HDy":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.MS":{optimize_threshold:[11,2,1,""]},"quapy.method.aggregative.MS2":{optimize_threshold:[11,2,1,""]},"quapy.method.aggregative.OneVsAll":{aggregate:[11,2,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],posterior_probabilities:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.PACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.PCC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ThresholdOptimization":{aggregate:[11,2,1,""],compute_fpr:[11,2,1,""],compute_table:[11,2,1,""],compute_tpr:[11,2,1,""],fit:[11,2,1,""],optimize_threshold:[11,2,1,""]},"quapy.method.base":{BaseQuantifier:[11,1,1,""],BinaryQuantifier:[11,1,1,""],isaggregative:[11,5,1,""],isbinary:[11,5,1,""],isprobabilistic:[11,5,1,""]},"quapy.method.base.BaseQuantifier":{aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.base.BinaryQuantifier":{binary:[11,3,1,""]},"quapy.method.meta":{EACC:[11,5,1,""],ECC:[11,5,1,""],EEMQ:[11,5,1,""],EHDy:[11,5,1,""],EPACC:[11,5,1,""],Ensemble:[11,1,1,""],ensembleFactory:[11,5,1,""],get_probability_distribution:[11,5,1,""]},"quapy.method.meta.Ensemble":{VALID_POLICIES:[11,4,1,""],accuracy_policy:[11,2,1,""],aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],ds_policy:[11,2,1,""],ds_policy_get_posteriors:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],ptr_policy:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""],sout:[11,2,1,""]},"quapy.method.neural":{QuaNetModule:[11,1,1,""],QuaNetTrainer:[11,1,1,""],mae_loss:[11,5,1,""]},"quapy.method.neural.QuaNetModule":{device:[11,3,1,""],forward:[11,2,1,""],init_hidden:[11,2,1,""]},"quapy.method.neural.QuaNetTrainer":{classes_:[11,3,1,""],clean_checkpoint:[11,2,1,""],clean_checkpoint_dir:[11,2,1,""],epoch:[11,2,1,""],fit:[11,2,1,""],get_aggregative_estims:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.non_aggregative":{MaximumLikelihoodPrevalenceEstimation:[11,1,1,""]},"quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation":{classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.model_selection":{GridSearchQ:[8,1,1,""]},"quapy.model_selection.GridSearchQ":{best_model:[8,2,1,""],classes_:[8,3,1,""],fit:[8,2,1,""],get_params:[8,2,1,""],quantify:[8,2,1,""],set_params:[8,2,1,""]},"quapy.plot":{binary_bias_bins:[8,5,1,""],binary_bias_global:[8,5,1,""],binary_diagonal:[8,5,1,""],error_by_drift:[8,5,1,""],save_or_show:[8,5,1,""]},"quapy.util":{EarlyStop:[8,1,1,""],create_if_not_exist:[8,5,1,""],create_parent_dir:[8,5,1,""],download_file:[8,5,1,""],download_file_if_not_exists:[8,5,1,""],get_quapy_home:[8,5,1,""],map_parallel:[8,5,1,""],parallel:[8,5,1,""],pickled_resource:[8,5,1,""],save_text_file:[8,5,1,""],temp_seed:[8,5,1,""]},quapy:{classification:[9,0,0,"-"],data:[10,0,0,"-"],error:[8,0,0,"-"],evaluation:[8,0,0,"-"],functional:[8,0,0,"-"],isbinary:[8,5,1,""],method:[11,0,0,"-"],model_selection:[8,0,0,"-"],plot:[8,0,0,"-"],util:[8,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","property","Python property"],"4":["py","attribute","Python attribute"],"5":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:property","4":"py:attribute","5":"py:function"},terms:{"0":[0,1,3,4,5,8,9,10,11],"00":[0,1,4,8],"000":1,"0001":[4,11],"000e":1,"001":[4,9,11],"009":1,"01":[8,9,11],"017":1,"018":0,"02":1,"021":0,"02552":4,"03":1,"034":1,"035":1,"037":1,"04":1,"041":1,"042":1,"046":1,"048":1,"05":[5,8],"055":1,"063":0,"065":0,"070":1,"073":1,"075":1,"078":0,"081":0,"082":[0,1],"083":0,"086":0,"091":1,"099":0,"1":[0,1,3,4,5,8,9,10,11],"10":[0,1,4,5,8,9,11],"100":[0,1,3,4,5,9,10,11],"1000":[0,4,11],"10000":4,"100000":4,"101":[4,10],"1010":4,"1024":11,"104":0,"108":1,"109":0,"11":[0,1,6],"11338":0,"114":1,"1145":[],"12":9,"120":0,"1215742":0,"1271":0,"13":[0,9],"139":0,"14":[3,11],"142":1,"146":[3,11],"1473":0,"148":0,"1484":0,"15":[3,8,11],"150":0,"153":0,"157":0,"158":0,"159":0,"1593":0,"1594":0,"1599":0,"161":0,"163":[0,1],"164":[0,3,11],"167":0,"17":0,"1771":1,"1775":[0,3],"1778":[0,3],"178":0,"1823":0,"1839":0,"18399":0,"1853":0,"19":[3,10,11],"193":0,"199151":0,"19982":4,"1e":9,"1st":0,"2":[0,1,3,5,8,10,11],"20":[5,8,11],"200":[1,9],"2000":0,"2002":[3,11],"2011":4,"2013":[3,11],"2015":[0,2,3,9,11],"2016":[3,10,11],"2017":[0,3,11],"2018":[0,3,10],"2019":[3,11],"2020":4,"20342":4,"206":0,"207":0,"208":0,"21":[1,3,5,8,11],"210":8,"211":0,"2126":0,"2155":0,"21591":0,"218":[3,11],"2184":0,"219e":1,"22":[0,3,9,10,11],"222":0,"222046":0,"226":0,"229":1,"229399":0,"23":9,"235":1,"238":0,"2390":0,"24":[0,9],"243":0,"248563":0,"24866":4,"24987":4,"25":[0,5,8,9,11],"25000":0,"256":[0,9],"26":9,"261":0,"265":0,"266":0,"267":0,"27":[1,3,9,11],"270":0,"2700406":[],"271":0,"272":0,"274":0,"275":1,"27th":[0,3,10],"28":3,"280":0,"281":0,"282":0,"283":[0,1],"288":0,"289":0,"2971":0,"2nd":0,"2t":1,"2x5fcv":0,"3":[0,1,3,5,6,8,9,10,11],"30":[0,1,3,11],"300":[0,1,9],"305":0,"306":0,"312":0,"32":[0,6],"33":[0,5],"331":0,"333":0,"335":0,"337":0,"34":[0,3,11],"341":0,"346":1,"347":0,"350":0,"351":0,"357":1,"359":0,"361":0,"366":1,"372":0,"373":0,"376132":0,"3765":0,"3813":0,"3821":0,"383e":1,"387e":1,"392":0,"394":0,"399":0,"3f":[1,6],"3rd":0,"4":[0,1,3,4,5,8,11],"40":[0,3,4,11],"404333":0,"407":0,"41":[3,11],"412":0,"412e":1,"413":0,"414":0,"417":0,"41734":4,"42":[1,8],"421":0,"4259":0,"426e":1,"427":0,"430":0,"434":0,"435":1,"43676":4,"437":0,"44":0,"446":0,"45":[3,5,11],"452":0,"459":1,"4601":0,"461":0,"463":0,"465":0,"466":0,"470":0,"48":[3,11],"481":0,"48135":4,"486":0,"4898":0,"492":0,"496":0,"4960":1,"497":0,"5":[0,1,3,4,5,8,9,10,11],"50":[0,5,8,11],"500":[0,1,4,5,11],"5000":[1,5],"5005":4,"507":0,"508":0,"512":[9,11],"514":0,"515e":1,"530":0,"534":0,"535":0,"535e":1,"5379":4,"539":0,"541":1,"546":0,"5473":0,"54it":4,"55":5,"55it":4,"565":1,"569":0,"57":0,"573":0,"578":1,"583":0,"591":[3,11],"5f":4,"5fcv":11,"6":[0,1,3,5,8,10,11],"60":0,"600":1,"601":0,"604":[3,11],"606":0,"625":0,"627":0,"633e":1,"634":1,"64":[9,11],"640":0,"641":0,"650":0,"653":0,"654":1,"66":[1,11],"665":0,"667":0,"669":0,"67":5,"683":0,"688":0,"691":0,"694582":0,"7":[1,5,9],"70":0,"700":0,"701e":1,"711":0,"717":1,"725":1,"730":0,"735":0,"740e":1,"748":0,"75":[0,5,8],"762":0,"774":0,"778":0,"787":0,"794":0,"798":0,"8":[0,1,5,10,11],"8000":0,"830":0,"837":1,"858":1,"861":0,"87":[0,3,11],"8788":0,"889504":0,"8d2fhsgcvn0aaaaa":[],"9":[0,1,3,5,11],"90":[5,8],"901":0,"909":1,"914":1,"917":0,"919":0,"922":0,"923":0,"935":0,"936":0,"937":0,"945":1,"95":8,"9533":0,"958":0,"97":0,"979":0,"982":0,"99":8,"abstract":[3,9,11],"case":[0,1,3,4,5,8,11],"class":[0,1,3,4,5,6,8,9,10,11],"d\u00edez":[3,11],"default":[1,3,8,9,10],"do":[0,1,3,4,8,9],"final":[1,3,5],"float":[0,3,8,9,10,11],"function":[0,1,3,4,5,6,7,9,11],"g\u00e1llego":[0,3,11],"gonz\u00e1lez":[3,11],"import":[0,1,3,4,5,6],"int":[0,5,8,10,11],"long":[4,9],"new":[0,3,10,11],"p\u00e9rez":[0,3,11],"return":[0,1,3,4,5,8,9,10,11],"rodr\u0131":[3,11],"short":9,"static":[3,11],"true":[0,1,3,4,5,6,8,9,10,11],"try":4,"while":[3,5,8,9,11],A:[0,3,8,9,10,11],As:[3,4],By:[1,3,8],For:[0,1,5,6,8,11],If:[3,5,8,11],In:[0,1,2,3,4,5,6,9,11],It:[3,4,5],One:[0,1,3,11],That:[1,4],The:[0,1,2,4,5,6,8,9,10,11],Then:3,These:0,To:[5,10],_:5,__:[],__class__:5,__name__:5,_adjust:[],_ae_:[],_classify_:11,_error_name_:11,_fit_learner_:11,_kld_:[],_labelledcollection_:11,_learner_:11,_mean:[],_min_df_:10,_my:[],_nkld_:[],_posterior_probabilities_:11,_q_:[],_rae_:[],_svmperf_:[],ab:[],aboud:3,about:[0,5],abov:[0,3,5],absolut:[1,3,5,6],absolute_error:8,abstractmethod:3,acc:[1,3,5,6,8,11],acc_error:8,accept:3,access:[0,3],accommod:0,accord:[1,3,4,8,9],accordingli:5,accuraci:[1,5],accuracy_polici:11,achiev:[1,3,4,5],acm:[0,3,10,11],across:[0,1,4,5,6],action:[0,11],acut:0,ad:6,add:[3,4,8],add_word:10,addit:3,addition:[0,11],adjust:[3,6,11],adjusted_quantif:8,adjustedclassifyandcount:11,adopt:[3,4],advanc:[0,6],advantag:3,ae:[1,2,5,8],ae_:1,affect:8,afterward:11,again:5,against:5,aggreg:[1,4,5,6,7,8],aggregativeprobabilisticquantifi:[3,11],aggregativequantifi:[3,11],aggregg:11,aim:[4,5],al:[0,2,9],alaiz:[3,11],alegr:[3,11],alejandro:4,alia:[3,11],all:[0,1,2,3,5,8,11],allia:3,alloc:9,allow:[0,1,2,3,5,8,9,10,11],almost:3,along:[0,3,11],alreadi:[3,11],also:[0,1,2,3,5,6,9],altern:4,although:[3,4,5,11],alwai:[3,4,5],among:3,an:[0,1,2,3,4,5,6,8,9,11],analys:[5,6],analysi:[0,3,6,10,11],analyz:5,ani:[0,1,3,4,5,6,8,9,10,11],anoth:[0,1,3,5],anyon:0,api:6,app:8,appeal:1,appear:5,append:5,appli:[2,3,4,5,8,9,10],appropri:4,approxim:[1,5,9,11],ar:[0,1,3,4,5,8,9,10,11],archive_filenam:8,archive_path:8,arg:[8,10,11],args_i:8,argu:4,argument:[0,1,3,5],arifici:8,aris:1,around:1,arrai:[1,3,5,8,9,10],articl:[3,4,11],artifici:[0,1,3,4,5,6,8],artificial_prevalence_predict:8,artificial_prevalence_protocol:8,artificial_prevalence_report:8,artificial_prevalence_sampl:8,artificial_sampling_ev:[1,4],artificial_sampling_gener:[0,10],artificial_sampling_index_gener:10,artificial_sampling_predict:[1,5],artificial_sampling_report:1,arxiv:4,asarrai:1,asdataload:9,asonam:0,assess:4,assign:[3,8],associ:10,assum:[1,6,11],assumpt:[1,5,6],astyp:10,attempt:3,attribut:11,august:0,autom:[0,3,6],automat:[0,1],av:[3,11],avail:[0,1,2,3,5,6,9],averag:[1,3],avoid:1,axi:5,b:[0,10],balanc:[0,4],band:5,bar:8,barranquero:[2,3,9,11],base:[0,3,6,7,8,9],base_classifi:5,base_estim:3,base_quantifier_class:11,baseestim:[9,11],baselin:6,basequantifi:[3,8,11],basic:[5,11],batch:9,batch_siz:9,batch_size_test:9,been:[0,3,4,5,10,11],befor:[3,9,11],behav:[3,5],being:[4,8],belief:1,belong:3,below:[0,2,3,5,10],best:[4,8,9,11],best_model:8,best_model_:4,best_params_:4,better:4,between:[4,5,6,9],beyond:5,bia:6,bias:5,bidirect:11,bin:[5,11],bin_bia:5,bin_diag:5,binar:[8,10],binari:[3,5,6,9,10,11],binary_bias_bin:[5,8],binary_bias_glob:[5,8],binary_diagon:[5,8],binary_quantifi:11,binaryquantifi:11,block:0,bool:[8,11],both:5,bound:8,box:5,breast:0,brief:1,broken:5,budg:1,budget:[1,4],build:11,bypass:11,c:[3,4,9,10,11],calibr:3,calibratedclassifi:3,calibratedclassifiercv:3,calibratedcv:11,call:[0,1,5,8,11],callabl:[0,8,10],can:[0,1,2,3,4,5,8],cancer:0,cannot:11,cardiotocographi:0,care:11,carri:3,casa_token:[],castano:[3,11],castro:[3,11],categor:3,categori:1,cc:[3,5,11],ceil:8,center:5,chang:[0,1,3,11],character:[3,6],characteriz:[0,3,11],charg:[0,8],check:[3,4],checkpoint:[9,11],checkpointdir:11,checkpointnam:11,checkpointpath:9,choic:4,chosen:[4,8],cl:0,class2int:10,class_weight:4,classes_:[8,10,11],classif:[0,1,3,7,8,10,11],classif_posterior:[3,11],classif_predict:[3,11],classif_predictions_bin:11,classifi:[1,4,5,6,9,11],classifier_net:9,classifiermixin:9,classifyandcount:[3,11],classmethod:[0,10,11],classnam:10,clean_checkpoint:11,clean_checkpoint_dir:11,clear:5,clearer:1,clearli:5,clip:8,close:1,closer:1,cmc:0,cnn:3,cnnnet:[3,9],code:[0,3,4,5,9],coincid:[0,6],col:[0,10],collect:[0,8,9,10],collet:10,color:[5,8],colormap:8,column:[0,10],com:[],combin:[0,1,4,8],combinatio:8,combinations_budget:8,come:0,commandlin:[],common:11,commonli:6,compar:[5,11],comparison:5,compil:[2,3],complet:[3,5],compon:9,compress:0,comput:[1,3,5,8,11],computation:4,compute_fpr:11,compute_t:11,compute_tpr:11,concept:6,concur:11,conduct:0,confer:[0,3,10],configur:[4,8],consecut:9,consid:[3,5,9,10],consist:[0,4,5,9,10],constrain:[1,5],constructor:3,consult:[0,1],contain:[1,2,3,5,8,9,10,11],contanin:8,content:7,context:8,contrast:1,control:[1,4],conv_block:[],conv_lay:[],convert:[1,3,9],convolut:9,copi:10,cornel:[],correct:11,correspond:[5,10],cost:1,costli:4,could:[0,1,3,4,5,6,11],count:[4,5,6,10,11],count_:[],counter:10,countvector:10,covari:10,cover:[1,4,9],coz:[0,3,11],cpu:[1,9],creat:[0,6,8],create_if_not_exist:8,create_parent_dir:8,crisp:3,criteria:4,cross:[3,11],cs:[],csr_matrix:10,csv:10,ctg:0,cuda:[3,9,11],cumbersom:1,curios:5,current:[3,8,9,10],custom:[3,6,8],customarili:[3,4],cv:[3,4],cyan:5,dat:[0,9],data:[1,3,4,5,6,7,8,9,11],data_hom:10,datafram:1,dataload:9,dataset:[1,3,4,5,6,7,8,9,11],dataset_nam:10,deal:0,decaesteck:[3,11],decai:9,decim:1,decis:[3,9],decision_funct:9,decomposit:9,dedic:1,deep:[3,8,11],def:[0,1,3,5,8],defin:[0,3,8,9,11],degre:4,del:[0,3,11],delai:8,deliv:3,dens:0,depend:[0,1,4,5,8],describ:[3,11],descript:0,design:4,desir:[0,1],despit:1,detail:[0,1,3,6,9,11],determin:[1,4,5],detriment:5,devel:10,develop:[4,6],deviat:[0,1,5],devic:[0,3,5,9,11],df:[1,10],df_replac:10,diabet:0,diagon:6,dict:[8,10,11],dictionari:[8,9],differ:[0,1,3,4,5,6,8,10],difficult:5,digit:0,dimens:[8,9,10],dimension:[8,9,10],directli:[0,1,3],directori:[2,9,10],discoveri:[3,11],discuss:5,disjoint:9,displai:[1,5],distanc:11,distant:[1,8],distribut:[0,3,5,8,11],diverg:[1,3],dl:[],doabl:0,doc_embed:11,doc_embedding_s:11,doc_posterior:11,document:[0,1,3,5,9,10,11],document_embed:9,doe:[0,2,3,8],doi:[],done:3,dot:5,down:5,download:[0,2,3],download_fil:8,download_file_if_not_exist:8,drawn:[0,1,4],drift:6,drop:[9,11],drop_p:9,dropout:9,ds:[3,11],ds_polici:11,ds_policy_get_posterior:11,dtype:1,dump:10,dure:[1,5],dynam:[3,9,11],e:[0,1,3,4,5,6,8,9,10,11],eacc:11,each:[0,1,3,4,5,8,9,10,11],earli:9,early_stop:11,earlystop:8,easili:[0,2,5,9],ecc:11,edu:[],eemq:11,effect:3,effici:3,ehdi:11,either:[1,3,8,11],element:3,elm:[3,11],em:11,emb:9,embed:[3,9],embed_s:9,embedding_s:9,empti:10,emq:[5,11],enabl:9,encod:10,end:[4,8],endeavour:6,enough:5,ensembl:[0,6,11],ensemblefactori:11,ensure_probabilist:11,entir:[0,3,4,5],environ:[1,3,4,5,8],ep:[1,8],epacc:11,epoch:[9,11],epsilon:[1,11],equal:[1,8],equidist:[0,8],equip:[3,5],err:8,err_drift:5,err_nam:8,error:[3,4,6,7,9],error_:[],error_by_drift:[5,8],error_funct:1,error_metr:[1,4,8],error_nam:[5,8,11],establish:8,estim:[1,3,5,6,8,9,11],estim_prev:[1,5,8],estim_preval:[3,6],esuli:[0,2,3,9,10,11],et:[0,2,9],etc:6,eval_budget:[4,8],evalu:[0,3,4,5,6,7,9],eventu:9,everi:[3,11],everyth:3,evinc:5,ex:[],exact:0,exactli:0,exampl:[0,1,3,4,5,8,9,11],exce:8,excel:0,except:[3,8],exemplifi:0,exhibit:[4,5],exist:8,expand_frame_repr:1,expect:6,expectationmaximizationquantifi:[3,11],experi:[1,2,3,4,5,8],explain:[1,5],explicitlossminim:11,explicitlossminimis:11,explor:[4,8],express:10,ext:2,extend:[2,3,11],extens:[0,2,5],extern:3,extract:[1,8],f1:[1,9],f1_error:8,f1e:[1,8],f:[0,1,3,4,5,6,10,11],fabrizio:4,facilit:6,fact:[3,5],fals:[1,3,5,8,9,10,11],famili:3,familiar:3,far:9,fast:8,faster:[0,10],feat1:10,feat2:10,featn:10,featur:0,feature_extract:10,fetch:[0,6],fetch_review:[0,1,3,4,5,10],fetch_twitt:[0,3,6,10],fetch_ucidataset:[0,3,10],fetch_ucilabelledcollect:[0,10],ff_layer:11,fhe:0,file:[0,5,9,10],fin:0,find:[0,4],finish:4,first:[0,1,2,3,5,8,10,11],fit:[1,3,4,5,6,8,9,10,11],fit_learn:[3,11],fit_transform:10,fix:[1,4],float64:1,fold:[3,11],folder:0,follow:[0,1,3,4,5,6],fomart:10,for_model_select:[0,10],form:0,format:[0,5,10],former:[2,11],forward:[9,11],found:[0,3,4,9],four:3,fp:11,fpr:8,framework:6,frequenc:0,from:[0,1,3,4,5,6,8,10,11],from_csv:10,from_nam:[1,8],from_spars:10,from_text:10,full:1,fulli:0,func:8,further:[0,1,3,9],fusion:[0,3,11],futur:3,g:[0,1,3,4,6,8,10,11],gao:[0,3,10,11],gasp:[0,10],gen:8,gen_data:5,gen_fn:8,gen_prevalence_predict:8,gener:[0,1,3,4,5,8,9,10,11],generation_func:8,german:0,get:[0,1,5,8,9],get_aggregative_estim:11,get_nprevpoints_approxim:[1,8],get_param:[3,8,9,11],get_probability_distribut:11,get_quapy_hom:8,github:[],given:[1,3,4,9,11],goe:4,good:[4,5],got:4,govern:1,gpu:9,grant:11,grid:[4,8,11],gridsearchcv:4,gridsearchq:[4,8],group:3,guarante:11,guez:[3,11],gzip:0,ha:[3,4,5,9],haberman:[0,3],handl:0,happen:[4,5],hard:3,harder:5,harri:0,have:[0,1,2,3,4,5,10,11],hcr:[0,3,10],hdy:[6,11],held:[3,4,9],helling:11,hellingerdist:8,hellingerdistancei:[3,11],help:5,here:1,hidden:[5,9],hidden_s:9,hide:5,high:5,higher:[1,5],hlt:[],hold:6,home:10,hook:11,how:[0,1,3,4,5,11],howev:[0,4,5,11],hp:[0,3,4,10],html:[],http:[],hyper:[4,8,9],hyperparam:4,hyperparamet:[3,8,11],i:[0,1,3,4,5,8,9,10,11],id:[0,3,10],idf:0,ieee:0,ignor:[8,10,11],iid:[1,5,6],illustr:[3,4,5],imdb:[0,5,10],implement:[0,1,3,4,5,6,9,11],impos:4,improv:[3,9],includ:[0,1,3,5,6],inde:[3,4],index:[0,3,6,9,10],indextransform:10,indic:[0,1,3,4,5,8,10,11],individu:[1,3],infer:0,inform:[0,1,3,4,8,10,11],infrequ:10,inherit:3,init:3,init_hidden:11,initi:[0,9],inplac:[1,3,10],input:[3,5,8,9],insight:5,inspir:3,instal:[0,3,6,9],instanc:[0,3,4,5,6,8,9,10,11],instanti:[0,1,3,4,9],instead:[1,3,4,11],integ:[3,9,10],integr:6,interest:[1,5,6],interestingli:5,interfac:[0,1],intern:[0,3,10],interpret:[5,6],interv:[1,5,8],introduc:1,invok:[0,1,3,8,10],involv:[2,5],io:[],ionospher:0,iri:0,irrespect:5,isaggreg:11,isbinari:[8,10,11],isometr:5,isprobabilist:11,isti:[],item:8,iter:[0,8,11],its:[3,4,9],itself:[3,11],j:[0,3,11],joachim:[3,9],job:[2,8],joblib:2,just:[1,3],k:[3,6,11],kei:8,kept:10,kernel:9,kernel_height:9,kfcv:[0,10,11],kindl:[0,1,3,5,10],kld:[1,2,8,9],know:3,knowledg:[0,3,10,11],known:[0,3,4],kullback:[1,3],kwarg:[9,10,11],l1:11,label:[0,3,4,5,6,8,9,10,11],labelledcollect:[0,3,4,8,10,11],larg:4,largest:8,last:[1,3,5,9],lastli:3,latex:5,latinn:[3,11],latter:11,layer:[3,9],lead:1,learn:[1,2,3,4,6,8,9,11],learner:[3,4,9,11],least:[0,10],leav:10,legend:8,leibler:[1,3],length:9,less:[8,10],let:[1,3],level:11,leverag:3,like:[0,1,3,5,9],limit:[5,8],line:[1,3],linear:5,linear_model:[1,3,4,6,9],linearsvc:[3,5],linspac:5,list:[0,5,8,9,10],listedcolormap:8,literatur:[0,1,4,6],load:[0,3,8,10],loader:0,loader_func:[0,10],local:8,log:10,logist:[1,3,9,11],logisticregress:[1,3,4,6,9],logscal:8,logspac:4,longer:8,longest:9,look:[0,1,3,5],loss:[6,9,11],low:[5,9],lower:[5,8],lower_is_bett:8,lowest:5,lowranklogisticregress:9,lr:[1,3,9,11],lstm:[3,9],lstm_class_nlay:9,lstm_hidden_s:11,lstm_nlayer:11,lstmnet:9,m:[3,8,11],machin:[1,4,6],made:[0,2,11],mae:[1,4,6,8,9,11],mae_loss:11,main:5,maintain:[3,11],make:[0,1,3],mammograph:0,manag:[0,3,10],mani:[1,3,4,5,6,11],manner:0,manual:0,map:[1,9],map_parallel:8,margin:9,matplotlib:[2,8],matric:[0,5,10],matrix:5,max:11,max_it:11,max_sample_s:11,maxim:6,maximum:[1,8,9],maximumlikelihoodprevalenceestim:11,md:[],mean:[0,1,3,4,5,6,9,10,11],mean_absolute_error:8,mean_relative_absolute_error:8,measur:[2,3,4,5,6,11],mediansweep2:11,mediansweep:11,member:3,memori:9,mention:3,merg:5,meta:[6,7,8],method:[0,1,4,5,6,7,8],method_data:5,method_nam:[5,8],metric:[1,3,4,6,8],might:1,min_df:[1,3,4,5,10],min_po:11,mine:[0,3,11],minim:8,minimum:10,minimun:10,mining6:10,mixtur:3,mkld:[1,8,11],mnkld:[1,8,11],mock:9,modal:4,model:[0,1,5,6,8,9,11],model_select:[4,7],modifi:[3,8],modul:[0,1,3,5,6,7],moment:[0,3],more:[3,5,8],moreo:[0,3,4,10],most:[0,3,5,6,11],movi:0,mrae:[1,6,8,9,11],ms2:11,ms:11,mse:[1,3,6,8,11],msg:11,multiprocess:8,multivari:[3,9,11],must:3,my:[],my_arrai:8,my_custom_load:0,my_data:0,mycustomloss:3,n:[0,1,8,9],n_bin:[5,8],n_class:[1,3,8,9,10,11],n_compon:9,n_dimens:9,n_epoch:11,n_featur:9,n_instanc:9,n_job:[1,3,4,8,10,11],n_preval:[0,8,10],n_prevpoint:[1,4,5,8],n_repeat:[1,8],n_repetit:[1,4,5,8],n_sampl:9,name:[5,8,9,10],nativ:6,natur:[1,8],natural_prevalence_predict:8,natural_prevalence_protocol:8,natural_prevalence_report:8,natural_sampling_gener:10,natural_sampling_index_gener:10,nbin:[5,8],ndarrai:[1,3,8,10,11],necessarili:11,need:[0,3,11],neg:[0,5],nest:[],net:9,network:[0,9,10,11],neural:[0,7,8,10],neuralclassifiertrain:[3,9],neutral:0,next:[4,8,9],nfold:[0,10],nkld:[1,2,6,8,9],nn:[9,11],nogap:10,non:[3,11],non_aggreg:[7,8],none:[1,4,8,9,10,11],nonetheless:4,nor:3,normal:[0,1,3,11],normalize_preval:8,note:[1,3,4,5],now:5,nowadai:3,np:[1,3,4,5,8],npp:8,nprevpoint:8,nrepeat:[0,10],num_prevalence_combin:[1,8],number:[0,1,3,5,8,9,10,11],numer:[0,1,3,6,10],numpi:[2,4,8,9,11],o_l6x_pcf09mdetq4tu7jk98mxfbgsxp9zso14jkuiyudgfg0:[],object:[0,8,9,10,11],observ:1,obtain:[1,4],occur:[5,10],occurr:10,octob:[0,3],off:9,offer:[3,6],older:2,omd:[0,10],ommit:1,onc:[1,3,5,8],one:[0,1,3,4,5,8,11],ones:[1,3,5,8,10],onevsal:[3,11],onli:[0,3,5,8,9,11],open:[0,6],oper:3,opt:4,optim:[2,3,4,8,9,11],optimize_threshold:11,option:[0,1,3,5,8,10,11],order:[0,2,3,5,8,10,11],order_bi:11,org:[],orient:[3,6,8,11],origin:[0,3,10,11],os:0,other:[1,3,5,6,8],otherwis:[0,3,11],our:[],out:[3,4,5,9],outcom:5,outer:8,output:[0,1,3,4,9,11],over:[3,4],overal:1,overestim:5,overrid:3,overridden:[3,11],own:4,p:[0,3,8,11],p_hat:8,pacc:[1,3,5,11],packag:[0,2,3,6,7],pad:9,pad_length:9,padding_length:9,page:[0,2,6],pageblock:0,pair:0,panda:[1,2],paper:[0,3,11],parallel:[1,3,8],param:[4,8,9,10,11],param_grid:[4,8,11],param_mod_sel:11,param_model_sel:11,paramet:[1,3,4,8,9,10,11],part:[3,10],particular:[0,1,3],particularli:1,pass:[0,1,5,9,11],past:1,patch:[2,3,9],path:[0,3,5,8,9,10],patienc:[8,9,11],pattern:[3,11],pca:9,pcalr:[],pcc:[3,4,5,11],pd:1,pdf:5,peopl:[],perf:[6,9],perform:[1,3,4,5,6,8,9,11],phonem:0,pick:4,pickl:[3,8,10],pickle_path:8,pickled_resourc:8,pii:[],pip:2,pipelin:[],pkl:8,plai:0,plan:3,pleas:3,plot:[6,7],png:5,point:[0,1,3,8],polici:[3,11],popular:6,portion:4,pos_class:[8,10],posit:[0,3,5],possibl:[1,3,8],posterior:[3,8,9,11],posterior_prob:[3,11],postpon:3,potter:0,pp:[0,3],practic:[0,4],pre:[0,3],prec:[0,8],precis:[0,1],preclassifi:3,predict:[3,4,5,8,9,11],predict_proba:[3,9,11],predictor:1,prepare_svmperf:[2,3],preprint:4,preprocess:[0,1,3,7,8],present:[0,3,10],preserv:[1,5],pretti:5,prev:[0,1,8,10],prevail:3,preval:[0,1,3,4,5,6,8,10,11],prevalence_estim:8,prevalence_from_label:8,prevalence_from_prob:8,prevalence_linspac:8,prevel:11,previou:3,previous:11,prevs_estim:11,prevs_hat:[1,8],princip:9,print:[0,1,3,4,6,9],prior:[1,3,4,5,6],priori:[3,11],probabilist:[3,11],probabilisticadjustedclassifyandcount:11,probabilisticclassifyandcount:11,probabl:[1,3,4,5,6,9,11],problem:[0,3,5,11],procedur:[3,6,11],proceed:[0,3,10],process:[3,4,8],processor:3,procol:1,produc:[0,1,5,8],product:3,progress:8,properli:0,properti:[3,8,9,10,11],proport:[3,4,8,9,11],propos:[2,3,11],protocl:8,protocol:[0,3,4,5,6,8],provid:[0,3,5,6],ptecondestim:11,ptr:[3,11],ptr_polici:11,purpos:[0,11],python:[0,6],pytorch:2,q:[0,2,3,8,9],qacc:9,qdrop_p:11,qf1:9,qgm:9,qp:[0,1,3,4,5,6,8],quanet:[2,6,9,11],quanetmodul:11,quanettrain:11,quantif:[0,1,6,8,9,10,11],quantifi:[3,4,5,6,8,11],quantification_error:8,quantiti:8,quapi:[0,1,2,3,4,5],quapy_data:0,quay_data:10,quevedo:[0,3,11],quick:[],r:[0,3,11],rae:[1,2,8],rais:[3,8],rand:8,random:[1,3,4,5,8],random_se:[1,8],random_st:10,randomli:0,rang:[0,5],rank:[3,9],rare:10,rate:[3,9],rather:[1,4],raw:10,rb:0,re:[3,4,10],read:10,reader:[7,8],readm:[],real:[9,10],reason:[3,5,6],receiv:[0,3,5],recip:11,recognit:[3,11],recommend:[1,5],recurr:[0,3,10],red:0,red_siz:[3,11],reduc:[0,10],reduce_column:[0,10],refer:[9,10],refit:[4,8],regard:4,regist:11,regress:9,regressor:[1,3,11],reindex_label:10,reiniti:9,rel:[1,3],relative_absolute_error:8,reli:[1,3],reliabl:[3,11],rememb:5,remov:10,repeat:[8,10],repetit:8,repl:10,replac:[0,3,10],replic:[1,4,8],report:1,repositori:0,repr_siz:9,repres:[1,3,5,10,11],represent:[0,3],request:[0,8,11],requir:[0,1,3,6,9],reset_net_param:9,resourc:8,respect:[0,1,5,11],respond:3,rest:[10,11],result:[1,2,3,4,5,6,11],retain:[0,3,9],retrain:4,return_constrained_dim:8,reus:[0,3,8],review:[5,6,10],reviews_sentiment_dataset:0,rewrit:5,right:4,role:0,root:6,roughli:0,routin:8,row:10,run:[0,1,2,3,4,5,8,11],s003132031400291x:[],s:[0,1,3,4,5,8,9,10],saeren:[3,11],sai:11,said:3,same:[0,3,5,10],sampl:[0,1,3,4,5,6,8,9,10,11],sample_s:[0,1,3,4,5,8,10,11],sampling_from_index:[0,10],sampling_index:[0,10],sander:[0,10],save:[5,8],save_or_show:8,save_text_fil:8,savepath:[5,8],scall:10,scenario:[1,3,4,5,6],scienc:[3,11],sciencedirect:[],scikit:[2,3,4],scipi:[2,10],score:[0,1,4,9,10],script:[1,2,3,6],se:[1,8],search:[3,4,6,8,11],sebastiani:[0,3,4,10,11],second:[0,1,3,5,8],section:4,see:[0,1,2,3,4,5,6,9],seed:[1,4,8],seem:3,seemingli:5,seen:5,select:[0,3,6,8,11],selector:3,self:[3,9,10,11],semeion:0,semev:0,semeval13:[0,10],semeval14:[0,10],semeval15:[0,10],semeval16:[0,6,10],sentenc:10,sentiment:[3,6,10,11],separ:[8,10],seri:0,serv:3,set:[0,1,3,4,5,6,8,9,10,11],set_opt:1,set_param:[3,8,9,11],set_siz:[],sever:0,sh:[2,3],shape:[5,8,9],share:[0,10],shift:[1,4,6,8],shorter:9,shoud:3,should:[0,1,3,4,5,6,9,10,11],show:[0,1,3,4,5,8,9,10],show_std:[5,8],showcas:5,shown:[1,5],shuffl:[9,10],signific:1,silent:[8,11],similar:11,simpl:[0,3,5,11],simplest:3,simplex:[0,8],simpli:[1,2,3,4,5,6,11],sinc:[0,1,3,5,8,11],singl:[1,3,6,11],size:[0,1,3,8,9,10,11],sklearn:[1,3,4,5,6,9,10,11],sld:3,slice:8,smooth:[1,8],smooth_limits_epsilon:8,so:[0,1,3,5,8,9,11],social:[0,3,10,11],soft:3,softwar:0,solid:5,solv:4,solve_adjust:11,some:[0,1,3,5],some_arrai:8,sometim:1,sonar:0,sourc:[2,3,6,9],sout:11,space:[0,4,9],spambas:0,spars:[0,10],special:[0,5,10],specif:[3,4],specifi:[0,1,3,5,8,9,10,11],spectf:0,spectrum:[0,1,4,5],speed:3,split:[0,3,4,5,9,10,11],split_stratifi:10,splitstratifi:10,spmatrix:10,squar:[1,3],sst:[0,10],stabil:1,standard:[0,1,5,10],start:4,stat:10,state:8,statist:[0,1,11],stats_siz:11,std:9,stdout:8,step:[5,8],stop:9,store:[0,9,10],str:[0,8,10],strategi:[3,4],stratifi:[0,3],stride:9,string:[1,8,10],strongli:[4,5],strprev:[0,1,8],structur:3,studi:[0,3,11],subclass:11,subinterv:5,sublinear_tf:10,submit:0,submodul:7,subobject:[],suboptim:4,subpackag:7,subsequ:[10,11],subtract:[0,8],subtyp:10,suffic:5,suffici:11,sum:11,summar:0,supervis:[4,6],support:[3,6,9],surpass:1,svm:[3,5,6,9],svm_light:[],svm_perf:[],svm_perf_classifi:9,svm_perf_learn:9,svm_perf_quantif:[2,3],svmae:[3,11],svmkld:[3,11],svmnkld:[3,11],svmperf:[2,3,7,8],svmperf_bas:[9,11],svmperf_hom:3,svmq:[3,11],svmrae:[3,11],syntax:5,system:4,t50:11,t:[0,1,3],take:[0,3,5,8,11],taken:[3,9],target:[3,5,6,8,9,11],task:[3,4,11],temp_se:8,tempor:8,tend:5,tendenc:5,tensor:9,term:[0,1,3,4,5,6,9,10,11],test:[0,1,3,4,5,6,8,9,10,11],test_bas:[],test_dataset:[],test_method:[],test_path:[0,10],test_sampl:8,test_split:10,text2tfidf:[0,1,3,10],text:[0,3,8,9,10,11],textclassifiernet:9,textual:[0,6,10],tf:[0,10],tfidf:[0,4,5,10],tfidfvector:10,than:[1,4,5,8,9,10],thei:[0,3],them:[0,3,11],theoret:4,thereaft:1,thi:[0,1,2,3,4,5,6,8,9,11],thing:3,third:[1,5],thorsten:9,those:[1,3,4,5,8,9],though:3,three:[0,5],thresholdoptim:11,through:[3,8],thu:[3,4,5,11],tictacto:0,time:[0,1,3,8,10],timeout:8,timeouterror:8,timer:8,titl:8,tj:[],tn:11,token:[0,9,10],tool:[1,6],top:[3,11],torch:[3,9,11],torchdataset:9,toward:5,tp:11,tpr:8,tqdm:2,tr_iter_per_poch:11,tr_prev:[5,8,11],trade:9,tradition:1,train:[0,1,3,4,5,6,8,9,10,11],train_path:[0,10],train_prev:[5,8],train_prop:10,train_siz:10,train_val_split:11,trainer:9,training_help:11,training_preval:5,training_s:5,transact:[3,11],transform:[0,9,10],transfus:0,trivial:3,true_prev:[1,5,8],true_preval:6,truncatedsvd:9,turn:4,tweet:[0,3,10,11],twitter:[6,10],twitter_sentiment_datasets_test:0,twitter_sentiment_datasets_train:0,two:[0,1,3,4,5,8],type:[0,3],typic:[1,4,5,9],uci:6,unabl:0,unadjust:5,unbias:5,uncompress:0,under:1,underestim:5,unfortun:5,unifi:0,uniform_prevalence_sampl:8,uniform_sampl:10,uniform_sampling_index:10,uniform_simplex_sampl:8,uniformli:8,union:[8,11],uniqu:10,unit:0,unix:0,unk:10,unless:11,unlik:[1,4],unus:[8,9,11],up:[3,4,8,9,11],updat:[],url:8,us:[0,1,3,4,5,6,8,9,10,11],user:[0,1,5],utf:10,util:[7,9],v:[3,11],va_iter_per_poch:11,val:[0,10],val_split:[3,4,8,9,11],valid:[0,1,3,4,5,8,9,10,11],valid_loss:[3,9],valid_polici:11,valu:[0,1,3,8,9,10,11],variabl:[1,3,5,8],varianc:[0,5],variant:[5,6,11],varieti:4,variou:[1,5],vector:[0,9,10],verbos:[0,1,4,8,9,10,11],veri:[3,5],versatil:6,version:[2,9],vertical_xtick:8,via:[0,2,3,11],view:5,visual:[5,6],vocab_s:9,vocabulari:[9,10],vocabulary_s:[3,9,10],vs:3,w:[0,3,10,11],wa:[0,3,5,10,11],wai:[1,11],wait:9,want:[3,4],warn:10,wb:[0,10],wdbc:0,we:[0,1,3,4,5,6],weight:[9,10],weight_decai:9,well:[0,3,4,5],were:0,what:3,when:[0,1,3,4,5,8,9],whenev:[5,8],where:[3,5,8,9,10,11],wherebi:4,whether:[8,9,10,11],which:[0,1,3,4,5,8,9,10,11],white:0,whole:[0,1,3,4,8],why:3,wide:5,wiki:[0,3],wine:0,within:[8,11],without:[1,3,8],word:[1,3,6,9,10],work:[1,3,4,5],worker:1,wors:[4,5],would:[0,1,3,5,6,8,11],wrapper:[8,9],written:6,www:[],x:[5,8,9,10,11],xavier:9,xavier_uniform:9,xlrd:[0,2],xy:10,y:[5,9,10,11],y_:11,y_pred:8,y_true:8,ye:10,yeast:0,yield:[5,8],you:[2,3],your:3,z:0,zero:0,zfthyovrzwxmgfzylqw_y8cagg:[],zip:[0,5]},titles:["Datasets","Evaluation","Installation","Quantification Methods","Model Selection","Plotting","Welcome to QuaPy\u2019s documentation!","quapy","quapy package","quapy.classification package","quapy.data package","quapy.method package"],titleterms:{"function":8,A:6,The:3,ad:0,aggreg:[3,11],base:[10,11],bia:5,classif:[4,9],classifi:3,content:[6,8,9,10,11],count:3,custom:0,data:[0,10],dataset:[0,10],diagon:5,distanc:3,document:6,drift:5,emq:3,ensembl:3,error:[1,5,8],evalu:[1,8],ex:[],exampl:6,expect:3,explicit:3,featur:6,get:[],hdy:3,helling:3,indic:6,instal:2,introduct:6,issu:0,learn:0,loss:[2,3,4],machin:0,maxim:3,measur:1,meta:[3,11],method:[3,9,11],minim:3,model:[3,4],model_select:8,modul:[8,9,10,11],network:3,neural:[3,9,11],non_aggreg:11,orient:[2,4],packag:[8,9,10,11],perf:2,plot:[5,8],preprocess:10,process:0,protocol:1,quanet:3,quantif:[2,3,4,5],quapi:[6,7,8,9,10,11],quick:6,reader:10,readm:[],requir:2,review:0,s:6,select:4,sentiment:0,start:[],submodul:[8,9,10,11],subpackag:8,svm:2,svmperf:9,tabl:6,target:4,test:[],test_bas:[],test_dataset:[],test_method:[],titl:[],twitter:0,uci:0,util:8,variant:3,welcom:6,y:3}}) \ No newline at end of file diff --git a/quapy/classification/methods.py b/quapy/classification/methods.py index b313f57..01f4654 100644 --- a/quapy/classification/methods.py +++ b/quapy/classification/methods.py @@ -3,10 +3,18 @@ from sklearn.decomposition import TruncatedSVD from sklearn.linear_model import LogisticRegression -class PCALR(BaseEstimator): +class LowRankLogisticRegression(BaseEstimator): """ - An example of a classification method that also generates embedded inputs, as those required for QuaNet. - This example simply combines a Principal Component Analysis (PCA) with Logistic Regression (LR). + An example of a classification method (i.e., an object that implements `fit`, `predict`, and `predict_proba`) + that also generates embedded inputs (i.e., that implements `transform`), as those required for + :class:`quapy.method.neural.QuaNet`. This is a mock method to allow for easily instantiating + :class:`quapy.method.neural.QuaNet` on array-like real-valued instances. + The transformation consists of applying :class:`sklearn.decomposition.TruncatedSVD` + while classification is performed using :class:`sklearn.linear_model.LogisticRegression` on the low-rank space. + + :param n_components: the number of principal components to retain + :param kwargs: parameters for the + `Logistic Regression `__ classifier """ def __init__(self, n_components=100, **kwargs): @@ -14,35 +22,76 @@ class PCALR(BaseEstimator): self.learner = LogisticRegression(**kwargs) def get_params(self): + """ + Get hyper-parameters for this estimator. + + :return: a dictionary with parameter names mapped to their values + """ params = {'n_components': self.n_components} params.update(self.learner.get_params()) return params def set_params(self, **params): - if 'n_components' in params: - self.n_components = params['n_components'] - del params['n_components'] - self.learner.set_params(**params) + """ + Set the parameters of this estimator. + + :param parameters: a `**kwargs` dictionary with the estimator parameters for + `Logistic Regression `__ + and eventually also `n_components` for `TruncatedSVD` + """ + params_ = dict(params) + if 'n_components' in params_: + self.n_components = params_['n_components'] + del params_['n_components'] + self.learner.set_params(**params_) def fit(self, X, y): - self.learner.fit(X, y) + """ + Fit the model according to the given training data. The fit consists of + fitting `TruncatedSVD` and then `LogisticRegression` on the low-rank representation. + + :param X: array-like of shape `(n_samples, n_features)` with the instances + :param y: array-like of shape `(n_samples, n_classes)` with the class labels + :return: `self` + """ nF = X.shape[1] self.pca = None if nF > self.n_components: - self.pca = TruncatedSVD(self.n_components).fit(X, y) + self.pca = TruncatedSVD(self.n_components).fit(X) + X = self.transform(X) + self.learner.fit(X, y) self.classes_ = self.learner.classes_ return self - def predict(self, X): - # X = self.transform(X) + """ + Predicts labels for the instances `X` embedded into the low-rank space. + + :param X: array-like of shape `(n_samples, n_features)` instances to classify + :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of + instances in `X` + """ + X = self.transform(X) return self.learner.predict(X) def predict_proba(self, X): - # X = self.transform(X) + """ + Predicts posterior probabilities for the instances `X` embedded into the low-rank space. + + :param X: array-like of shape `(n_samples, n_features)` instances to classify + :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities + """ + X = self.transform(X) return self.learner.predict_proba(X) def transform(self, X): + """ + Returns the low-rank approximation of `X` with `n_components` dimensions, or `X` unaltered if + `n_components` >= `X.shape[1]`. + + :param X: array-like of shape `(n_samples, n_features)` instances to embed + :return: array-like of shape `(n_samples, n_components)` with the embedded instances + """ if self.pca is None: return X return self.pca.transform(X) diff --git a/quapy/classification/neural.py b/quapy/classification/neural.py index 7823165..0d576c5 100644 --- a/quapy/classification/neural.py +++ b/quapy/classification/neural.py @@ -16,6 +16,22 @@ from quapy.util import EarlyStop class NeuralClassifierTrainer: + """ + Trains a neural network for text classification. + + :param net: an instance of `TextClassifierNet` implementing the forward pass + :param lr: learning rate (default 1e-3) + :param weight_decay: weight decay (default 0) + :param patience: number of epochs that do not show any improvement in validation + to wait before applying early stop (default 10) + :param epochs: maximum number of training epochs (default 200) + :param batch_size: batch size for training (default 64) + :param batch_size_test: batch size for test (default 512) + :param padding_length: maximum number of tokens to consider in a document (default 300) + :param device: specify 'cpu' (default) or 'cuda' for enabling gpu + :param checkpointpath: where to store the parameters of the best model found so far + according to the evaluation in the held-out validation split (default '../checkpoint/classifier_net.dat') + """ def __init__(self, net: 'TextClassifierNet', @@ -45,23 +61,36 @@ class NeuralClassifierTrainer: 'device': torch.device(device) } self.learner_hyperparams = self.net.get_params() - self.checkpointpath = checkpointpath self.classes_ = np.asarray([0, 1]) print(f'[NeuralNetwork running on {device}]') - os.makedirs(Path(checkpointpath).parent, exist_ok=True) def reset_net_params(self, vocab_size, n_classes): + """Reinitialize the network parameters + + :param vocab_size: the size of the vocabulary + :param n_classes: the number of target classes + """ self.net = self.net.__class__(vocab_size, n_classes, **self.learner_hyperparams) self.net = self.net.to(self.trainer_hyperparams['device']) self.net.xavier_uniform() def get_params(self): + """Get hyper-parameters for this estimator + + :return: a dictionary with parameter names mapped to their values + """ return {**self.net.get_params(), **self.trainer_hyperparams} def set_params(self, **params): + """Set the parameters of this trainer and the learner it is training. + In this current version, parameter names for the trainer and learner should + be disjoint. + + :param params: a `**kwargs` dictionary with the parameters + """ trainer_hyperparams = self.trainer_hyperparams learner_hyperparams = self.net.get_params() for key, val in params.items(): @@ -81,6 +110,10 @@ class NeuralClassifierTrainer: @property def device(self): + """ Gets the device in which the network is allocated + + :return: device + """ return next(self.net.parameters()).device def _train_epoch(self, data, status, pbar, epoch): @@ -132,6 +165,14 @@ class NeuralClassifierTrainer: f'macroF1={100 * self.status["va"]["f1"]:.2f}%') def fit(self, instances, labels, val_split=0.3): + """ + Fits the model according to the given training data. + + :param instances: list of lists of indexed tokens + :param labels: array-like of shape `(n_samples, n_classes)` with the class labels + :param val_split: proportion of training documents to be taken as the validation set (default 0.3) + :return: + """ train, val = LabelledCollection(instances, labels).split_stratified(1-val_split) opt = self.trainer_hyperparams checkpoint = self.checkpointpath @@ -169,9 +210,22 @@ class NeuralClassifierTrainer: return self def predict(self, instances): + """ + Predicts labels for the instances + + :param instances: list of lists of indexed tokens + :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of + instances in `X` + """ return np.argmax(self.predict_proba(instances), axis=-1) def predict_proba(self, instances): + """ + Predicts posterior probabilities for the instances + + :param X: array-like of shape `(n_samples, n_features)` instances to classify + :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities + """ self.net.eval() opt = self.trainer_hyperparams with torch.no_grad(): @@ -182,6 +236,13 @@ class NeuralClassifierTrainer: return np.concatenate(positive_probs) def transform(self, instances): + """ + Returns the embeddings of the instances + + :param instances: list of lists of indexed tokens + :return: array-like of shape `(n_samples, embed_size)` with the embedded instances, + where `embed_size` is defined by the classification network + """ self.net.eval() embeddings = [] opt = self.trainer_hyperparams @@ -193,6 +254,12 @@ class NeuralClassifierTrainer: class TorchDataset(torch.utils.data.Dataset): + """ + Transforms labelled instances into a Torch's :class:`torch.utils.data.DataLoader` object + + :param instances: list of lists of indexed tokens + :param labels: array-like of shape `(n_samples, n_classes)` with the class labels + """ def __init__(self, instances, labels=None): self.instances = instances @@ -205,6 +272,18 @@ class TorchDataset(torch.utils.data.Dataset): return {'doc': self.instances[index], 'label': self.labels[index] if self.labels is not None else None} def asDataloader(self, batch_size, shuffle, pad_length, device): + """ + Converts the labelled collection into a Torch DataLoader with dynamic padding for + the batch + + :param batch_size: batch size + :param shuffle: whether or not to shuffle instances + :param pad_length: the maximum length for the list of tokens (dynamic padding is + applied, meaning that if the longest document in the batch is shorter than + `pad_length`, then the batch is padded up to its length, and not to `pad_length`. + :param device: whether to allocate tensors in cpu or in cuda + :return: a :class:`torch.utils.data.DataLoader` object + """ def collate(batch): data = [torch.LongTensor(item['doc'][:pad_length]) for item in batch] data = pad_sequence(data, batch_first=True, padding_value=qp.environ['PAD_INDEX']).to(device) @@ -220,37 +299,97 @@ class TorchDataset(torch.utils.data.Dataset): class TextClassifierNet(torch.nn.Module, metaclass=ABCMeta): + """ + Abstract Text classifier (`torch.nn.Module`) + """ @abstractmethod - def document_embedding(self, x): ... + def document_embedding(self, x): + """Embeds documents (i.e., performs the forward pass up to the + next-to-last layer). + + :param x: a batch of instances, typically generated by a torch's `DataLoader` + instance (see :class:`quapy.classification.neural.TorchDataset`) + :return: a torch tensor of shape `(n_samples, n_dimensions)`, where + `n_samples` is the number of documents, and `n_dimensions` is the + dimensionality of the embedding + """ + ... def forward(self, x): + """Performs the forward pass. + + :param x: a batch of instances, typically generated by a torch's `DataLoader` + instance (see :class:`quapy.classification.neural.TorchDataset`) + :return: a tensor of shape `(n_instances, n_classes)` with the decision scores + for each of the instances and classes + """ doc_embedded = self.document_embedding(x) return self.output(doc_embedded) def dimensions(self): + """Gets the number of dimensions of the embedding space + + :return: integer + """ return self.dim def predict_proba(self, x): + """ + Predicts posterior probabilities for the instances in `x` + + :param x: a torch tensor of indexed tokens with shape `(n_instances, pad_length)` + where `n_instances` is the number of instances in the batch, and `pad_length` + is length of the pad in the batch + :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities + """ logits = self(x) return torch.softmax(logits, dim=1).detach().cpu().numpy() def xavier_uniform(self): + """ + Performs Xavier initialization of the network parameters + """ for p in self.parameters(): if p.dim() > 1 and p.requires_grad: torch.nn.init.xavier_uniform_(p) @abstractmethod - def get_params(self): ... + def get_params(self): + """ + Get hyper-parameters for this estimator + + :return: a dictionary with parameter names mapped to their values + """ + ... @property - def vocabulary_size(self): ... + def vocabulary_size(self): + """ + Return the size of the vocabulary + + :return: integer + """ + ... class LSTMnet(TextClassifierNet): + """ + An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on + Long Short Term Memory networks. + + :param vocabulary_size: the size of the vocabulary + :param n_classes: number of target classes + :param embedding_size: the dimensionality of the word embeddings space (default 100) + :param hidden_size: the dimensionality of the hidden space (default 256) + :param repr_size: the dimensionality of the document embeddings space (default 100) + :param lstm_class_nlayers: number of LSTM layers (default 1) + :param drop_p: drop probability for dropout (default 0.5) + """ def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1, drop_p=0.5): + super().__init__() self.vocabulary_size_ = vocabulary_size self.n_classes = n_classes @@ -270,7 +409,7 @@ class LSTMnet(TextClassifierNet): self.doc_embedder = torch.nn.Linear(hidden_size, self.dim) self.output = torch.nn.Linear(self.dim, n_classes) - def init_hidden(self, set_size): + def __init_hidden(self, set_size): opt = self.hyperparams var_hidden = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size']) var_cell = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size']) @@ -279,21 +418,55 @@ class LSTMnet(TextClassifierNet): return var_hidden, var_cell def document_embedding(self, x): + """Embeds documents (i.e., performs the forward pass up to the + next-to-last layer). + + :param x: a batch of instances, typically generated by a torch's `DataLoader` + instance (see :class:`quapy.classification.neural.TorchDataset`) + :return: a torch tensor of shape `(n_samples, n_dimensions)`, where + `n_samples` is the number of documents, and `n_dimensions` is the + dimensionality of the embedding + """ embedded = self.word_embedding(x) - rnn_output, rnn_hidden = self.lstm(embedded, self.init_hidden(x.size()[0])) + rnn_output, rnn_hidden = self.lstm(embedded, self.__init_hidden(x.size()[0])) abstracted = self.dropout(F.relu(rnn_hidden[0][-1])) abstracted = self.doc_embedder(abstracted) return abstracted def get_params(self): + """ + Get hyper-parameters for this estimator + + :return: a dictionary with parameter names mapped to their values + """ return self.hyperparams @property def vocabulary_size(self): + """ + Return the size of the vocabulary + + :return: integer + """ return self.vocabulary_size_ class CNNnet(TextClassifierNet): + """ + An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on + Convolutional Neural Networks. + + :param vocabulary_size: the size of the vocabulary + :param n_classes: number of target classes + :param embedding_size: the dimensionality of the word embeddings space (default 100) + :param hidden_size: the dimensionality of the hidden space (default 256) + :param repr_size: the dimensionality of the document embeddings space (default 100) + :param kernel_heights: list of kernel lengths (default [3,5,7]), i.e., the number of + consecutive tokens that each kernel covers + :param stride: convolutional stride (default 1) + :param stride: convolutional pad (default 0) + :param drop_p: drop probability for dropout (default 0.5) + """ def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5): @@ -320,19 +493,28 @@ class CNNnet(TextClassifierNet): self.doc_embedder = torch.nn.Linear(len(kernel_heights) * hidden_size, self.dim) self.output = nn.Linear(self.dim, n_classes) - def conv_block(self, input, conv_layer): + def __conv_block(self, input, conv_layer): conv_out = conv_layer(input) # conv_out.size() = (batch_size, out_channels, dim, 1) activation = F.relu(conv_out.squeeze(3)) # activation.size() = (batch_size, out_channels, dim1) max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2) # maxpool_out.size() = (batch_size, out_channels) return max_out def document_embedding(self, input): + """Embeds documents (i.e., performs the forward pass up to the + next-to-last layer). + + :param input: a batch of instances, typically generated by a torch's `DataLoader` + instance (see :class:`quapy.classification.neural.TorchDataset`) + :return: a torch tensor of shape `(n_samples, n_dimensions)`, where + `n_samples` is the number of documents, and `n_dimensions` is the + dimensionality of the embedding + """ input = self.word_embedding(input) input = input.unsqueeze(1) # input.size() = (batch_size, 1, num_seq, embedding_length) - max_out1 = self.conv_block(input, self.conv1) - max_out2 = self.conv_block(input, self.conv2) - max_out3 = self.conv_block(input, self.conv3) + max_out1 = self.__conv_block(input, self.conv1) + max_out2 = self.__conv_block(input, self.conv2) + max_out3 = self.__conv_block(input, self.conv3) all_out = torch.cat((max_out1, max_out2, max_out3), 1) # all_out.size() = (batch_size, num_kernels*out_channels) abstracted = self.dropout(F.relu(all_out)) # (batch_size, num_kernels*out_channels) @@ -340,10 +522,20 @@ class CNNnet(TextClassifierNet): return abstracted def get_params(self): + """ + Get hyper-parameters for this estimator + + :return: a dictionary with parameter names mapped to their values + """ return self.hyperparams @property def vocabulary_size(self): + """ + Return the size of the vocabulary + + :return: integer + """ return self.vocabulary_size_ diff --git a/quapy/classification/svmperf.py b/quapy/classification/svmperf.py index b5a4e85..2f6ad90 100644 --- a/quapy/classification/svmperf.py +++ b/quapy/classification/svmperf.py @@ -1,17 +1,29 @@ import random import subprocess -import tempfile from os import remove, makedirs from os.path import join, exists from subprocess import PIPE, STDOUT -import shutil - import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.datasets import dump_svmlight_file class SVMperf(BaseEstimator, ClassifierMixin): + """A wrapper for the `SVM-perf package `__ by Thorsten Joachims. + When using losses for quantification, the source code has to be patched. See + the `installation documentation `__ + for further details. + + References: + + * `Esuli et al.2015 `__ + * `Barranquero et al.2015 `__ + + :param svmperf_base: path to directory containing the binary files `svm_perf_learn` and `svm_perf_classify` + :param C: trade-off between training error and margin (default 0.01) + :param verbose: set to True to print svm-perf std outputs + :param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae". + """ # losses with their respective codes in svm_perf implementation valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27} @@ -24,10 +36,22 @@ class SVMperf(BaseEstimator, ClassifierMixin): self.loss = loss def set_params(self, **parameters): + """ + Set the hyper-parameters for svm-perf. Currently, only the `C` parameter is supported + + :param parameters: a `**kwargs` dictionary `{'C': }` + """ assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported' self.C = parameters['C'] def fit(self, X, y): + """ + Trains the SVM for the multivariate performance loss + + :param X: training instances + :param y: a binary vector of labels + :return: `self` + """ assert self.loss in SVMperf.valid_losses, \ f'unsupported loss {self.loss}, valid ones are {list(SVMperf.valid_losses.keys())}' @@ -68,11 +92,24 @@ class SVMperf(BaseEstimator, ClassifierMixin): return self def predict(self, X): + """ + Predicts labels for the instances `X` + :param X: array-like of shape `(n_samples, n_features)` instances to classify + :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of + instances in `X` + """ confidence_scores = self.decision_function(X) predictions = (confidence_scores > 0) * 1 return predictions def decision_function(self, X, y=None): + """ + Evaluate the decision function for the samples in `X`. + + :param X: array-like of shape `(n_samples, n_features)` containing the instances to classify + :param y: unused + :return: array-like of shape `(n_samples,)` containing the decision scores of the instances + """ assert hasattr(self, 'tmpdir'), 'predict called before fit' assert self.tmpdir is not None, 'model directory corrupted' assert exists(self.model), 'model not found' diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 35f87b9..f4ff185 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -91,8 +91,8 @@ class GridSearchQ(BaseQuantifier): if self.protocol=='npp' and (self.eval_budget is None or self.eval_budget <= 0): raise ValueError(f'when protocol="npp" the parameter eval_budget should be ' f'indicated (and should be >0).') - if self.n_prevpoints != 1: - print('[warning] n_prevpoints has been set and will be ignored for the selected protocol') + if self.n_repetitions != 1: + print('[warning] n_repetitions has been set and will be ignored for the selected protocol') def _sout(self, msg): if self.verbose: @@ -165,7 +165,6 @@ class GridSearchQ(BaseQuantifier): params_values = list(self.param_grid.values()) model = self.model - n_jobs = self.n_jobs if self.timeout > 0: def handler(signum, frame): @@ -174,7 +173,6 @@ class GridSearchQ(BaseQuantifier): signal.signal(signal.SIGALRM, handler) - self._sout(f'starting optimization with n_jobs={n_jobs}') self.param_scores_ = {} self.best_score_ = None some_timeouts = False diff --git a/quapy/util.py b/quapy/util.py index 96c5835..9eafdfa 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -83,6 +83,7 @@ def download_file_if_not_exists(url, archive_path): def create_if_not_exist(path): os.makedirs(path, exist_ok=True) + return path def get_quapy_home():