From 3eb760901f203a6c00d149799b768dba227e4f99 Mon Sep 17 00:00:00 2001
From: Alex Moreo
Date: Fri, 12 Nov 2021 14:30:02 +0100
Subject: [PATCH] doc update, official baselines for T1A and T1B refactored
---
LeQua2022/baselinesSVD_T1A.py | 76 ----
LeQua2022/baselines_T1.py | 91 ++++
LeQua2022/baselines_T1A.py | 71 ---
LeQua2022/baselines_T1Amodsel.py | 81 ----
LeQua2022/baselines_T1B.py | 55 ---
LeQua2022/constants.py | 7 +
LeQua2022/data.py | 25 +-
docs/build/html/_sources/index.rst.txt | 2 +-
docs/build/html/genindex.html | 28 +-
docs/build/html/index.html | 2 +-
docs/build/html/objects.inv | Bin 2710 -> 2712 bytes
docs/build/html/quapy.classification.html | 507 ++++++++++++++++++----
docs/build/html/searchindex.js | 2 +-
quapy/classification/methods.py | 73 +++-
quapy/classification/neural.py | 214 ++++++++-
quapy/classification/svmperf.py | 43 +-
quapy/model_selection.py | 6 +-
quapy/util.py | 1 +
18 files changed, 846 insertions(+), 438 deletions(-)
delete mode 100644 LeQua2022/baselinesSVD_T1A.py
create mode 100644 LeQua2022/baselines_T1.py
delete mode 100644 LeQua2022/baselines_T1A.py
delete mode 100644 LeQua2022/baselines_T1Amodsel.py
delete mode 100644 LeQua2022/baselines_T1B.py
diff --git a/LeQua2022/baselinesSVD_T1A.py b/LeQua2022/baselinesSVD_T1A.py
deleted file mode 100644
index 576a7ee..0000000
--- a/LeQua2022/baselinesSVD_T1A.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import pickle
-
-import numpy as np
-from sklearn.linear_model import LogisticRegression
-from tqdm import tqdm
-import pandas as pd
-
-import quapy as qp
-from quapy.data import LabelledCollection
-from quapy.method.aggregative import *
-import quapy.functional as F
-from data import *
-import os
-import constants
-
-from sklearn.decomposition import TruncatedSVD
-
-
-# LeQua official baselines for task T1A (Binary/Vector)
-# =====================================================
-
-predictions_path = os.path.join('predictions', 'T1A')
-os.makedirs(predictions_path, exist_ok=True)
-
-models_path = os.path.join('models', 'T1A')
-os.makedirs(models_path, exist_ok=True)
-
-pathT1A = './data/T1A/public'
-T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors')
-T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv')
-T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt')
-
-train = LabelledCollection.load(T1A_trainpath, load_binary_vectors)
-nF = train.instances.shape[1]
-svd = TruncatedSVD(n_components=300)
-train.instances = svd.fit_transform(train.instances)
-
-qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE
-
-print(f'number of classes: {len(train.classes_)}')
-print(f'number of training documents: {len(train)}')
-print(f'training prevalence: {F.strprev(train.prevalence())}')
-print(f'training matrix shape: {train.instances.shape}')
-
-true_prevalence = ResultSubmission.load(T1A_devprevalence_path)
-
-for quantifier in [CC, ACC, PCC, PACC, EMQ, HDy]:
-
- # classifier = CalibratedClassifierCV(LogisticRegression())
- classifier = LogisticRegression()
- model = quantifier(classifier).fit(train)
- quantifier_name = model.__class__.__name__
-
- predictions = ResultSubmission(categories=['negative', 'positive'])
- for samplename, sample in tqdm(gen_load_samples_T1(T1A_devvectors_path, nF),
- desc=quantifier_name, total=len(true_prevalence)):
- sample = svd.transform(sample)
- predictions.add(samplename, model.quantify(sample))
-
- predictions.dump(os.path.join(predictions_path, quantifier_name + '.svd.csv'))
- pickle.dump(model, open(os.path.join(models_path, quantifier_name+'.svd.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
-
- mae, mrae = evaluate_submission(true_prevalence, predictions)
- print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}')
-
-"""
-validation
-CC 0.1862 1.9587
-ACC 0.0394 0.2669
-PCC 0.1789 2.1383
-PACC 0.0354 0.1587
-EMQ 0.0224 0.0960
-HDy 0.0467 0.2121
-"""
-
-
diff --git a/LeQua2022/baselines_T1.py b/LeQua2022/baselines_T1.py
new file mode 100644
index 0000000..3a52361
--- /dev/null
+++ b/LeQua2022/baselines_T1.py
@@ -0,0 +1,91 @@
+import argparse
+import pickle
+from sklearn.linear_model import LogisticRegression as LR
+from quapy.method.aggregative import *
+import quapy.functional as F
+from data import *
+import os
+import constants
+
+
+# LeQua official baselines for task T1B (Multiclass/Vector)
+# =========================================================
+
+def baselines():
+ yield CC(LR(n_jobs=-1)), "CC"
+ yield ACC(LR(n_jobs=-1)), "ACC"
+ yield PCC(LR(n_jobs=-1)), "PCC"
+ yield PACC(LR(n_jobs=-1)), "PACC"
+ yield EMQ(CalibratedClassifierCV(LR(), n_jobs=-1)), "SLD"
+ yield HDy(LR(n_jobs=-1)) if args.task == 'T1A' else OneVsAll(HDy(LR()), n_jobs=-1), "HDy"
+
+
+def main(args):
+
+ models_path = qp.util.create_if_not_exist(os.path.join(args.modeldir, args.task))
+
+ path_dev_vectors = os.path.join(args.datadir, 'dev_vectors')
+ path_dev_prevs = os.path.join(args.datadir, 'dev_prevalences.csv')
+ path_train = os.path.join(args.datadir, 'training_vectors.txt')
+
+ qp.environ['SAMPLE_SIZE'] = constants.SAMPLE_SIZE[args.task]
+
+ train = LabelledCollection.load(path_train, load_binary_vectors)
+ nF = train.instances.shape[1]
+
+ print(f'number of classes: {len(train.classes_)}')
+ print(f'number of training documents: {len(train)}')
+ print(f'training prevalence: {F.strprev(train.prevalence())}')
+ print(f'training matrix shape: {train.instances.shape}')
+
+ param_grid = {
+ 'C': np.logspace(-3,3,7),
+ 'class_weight': ['balanced', None]
+ }
+
+ def gen_samples():
+ return gen_load_samples_T1(path_dev_vectors, nF, ground_truth_path=path_dev_prevs, return_id=False)
+
+ for quantifier, q_name in baselines():
+ print(f'{q_name}: Model selection')
+ quantifier = qp.model_selection.GridSearchQ(
+ quantifier,
+ param_grid,
+ sample_size=None,
+ protocol='gen',
+ error=qp.error.mae,
+ refit=False,
+ verbose=True
+ ).fit(train, gen_samples)
+
+ print(f'{q_name} got MAE={quantifier.best_score_:.3f} (hyper-params: {quantifier.best_params_})')
+
+ model_path = os.path.join(models_path, q_name+'.pkl')
+ print(f'saving model in {model_path}')
+ pickle.dump(quantifier.best_model(), open(model_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='LeQua2022 Task T1A/T1B baselines')
+ parser.add_argument('task', metavar='TASK', type=str, choices=['T1A', 'T1B'],
+ help='Task name (T1A, T1B)')
+ parser.add_argument('datadir', metavar='DATA-PATH', type=str,
+ help='Path of the directory containing "dev_prevalences.csv", "training_vectors.txt", and '
+ 'the directory "dev_vectors"')
+ parser.add_argument('modeldir', metavar='MODEL-PATH', type=str,
+ help='Path where to save the models. '
+ 'A subdirectory named will be automatically created.')
+ args = parser.parse_args()
+
+ if not os.path.exists(args.datadir):
+ raise FileNotFoundError(f'path {args.datadir} does not exist')
+ if not os.path.isdir(args.datadir):
+ raise ValueError(f'path {args.datadir} is not a valid directory')
+ if not os.path.exists(os.path.join(args.datadir, "dev_prevalences.csv")):
+ raise FileNotFoundError(f'path {args.datadir} does not contain "dev_prevalences.csv" file')
+ if not os.path.exists(os.path.join(args.datadir, "training_vectors.txt")):
+ raise FileNotFoundError(f'path {args.datadir} does not contain "training_vectors.txt" file')
+ if not os.path.exists(os.path.join(args.datadir, "dev_vectors")):
+ raise FileNotFoundError(f'path {args.datadir} does not contain "dev_vectors" folder')
+
+ main(args)
diff --git a/LeQua2022/baselines_T1A.py b/LeQua2022/baselines_T1A.py
deleted file mode 100644
index 48d42d4..0000000
--- a/LeQua2022/baselines_T1A.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import pickle
-
-import numpy as np
-from sklearn.linear_model import LogisticRegression
-from tqdm import tqdm
-import pandas as pd
-
-import quapy as qp
-from quapy.data import LabelledCollection
-from quapy.method.aggregative import *
-import quapy.functional as F
-from data import *
-import os
-import constants
-
-
-# LeQua official baselines for task T1A (Binary/Vector)
-# =====================================================
-
-predictions_path = os.path.join('predictions', 'T1A')
-os.makedirs(predictions_path, exist_ok=True)
-
-models_path = os.path.join('models', 'T1A')
-os.makedirs(models_path, exist_ok=True)
-
-pathT1A = './data/T1A/public'
-T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors')
-T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv')
-T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt')
-
-train = LabelledCollection.load(T1A_trainpath, load_binary_vectors)
-nF = train.instances.shape[1]
-
-qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE
-
-print(f'number of classes: {len(train.classes_)}')
-print(f'number of training documents: {len(train)}')
-print(f'training prevalence: {F.strprev(train.prevalence())}')
-print(f'training matrix shape: {train.instances.shape}')
-
-true_prevalence = ResultSubmission.load(T1A_devprevalence_path)
-
-for quantifier in [CC, ACC, PCC, PACC, EMQ, HDy]:
-
- # classifier = CalibratedClassifierCV(LogisticRegression(C=1))
- classifier = LogisticRegression(C=1)
- model = quantifier(classifier).fit(train)
- quantifier_name = model.__class__.__name__
-
- predictions = ResultSubmission(categories=['negative', 'positive'])
- for samplename, sample in tqdm(gen_load_samples_T1(T1A_devvectors_path, nF),
- desc=quantifier_name, total=len(true_prevalence)):
- predictions.add(samplename, model.quantify(sample))
-
- predictions.dump(os.path.join(predictions_path, quantifier_name + '.csv'))
- pickle.dump(model, open(os.path.join(models_path, quantifier_name+'.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
-
- mae, mrae = evaluate_submission(true_prevalence, predictions)
- print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}')
-
-"""
-validation
-CC 0.1862 1.9587
-ACC 0.0394 0.2669
-PCC 0.1789 2.1383
-PACC 0.0354 0.1587
-EMQ 0.0224 0.0960
-HDy 0.0467 0.2121
-"""
-
-
diff --git a/LeQua2022/baselines_T1Amodsel.py b/LeQua2022/baselines_T1Amodsel.py
deleted file mode 100644
index 12d658a..0000000
--- a/LeQua2022/baselines_T1Amodsel.py
+++ /dev/null
@@ -1,81 +0,0 @@
-import pickle
-from sklearn.linear_model import LogisticRegression
-from quapy.method.aggregative import *
-import quapy.functional as F
-from data import *
-import os
-import constants
-
-
-# LeQua official baselines for task T1A (Binary/Vector)
-# =====================================================
-
-predictions_path = os.path.join('predictions', 'T1A')
-os.makedirs(predictions_path, exist_ok=True)
-
-models_path = os.path.join('models', 'T1A')
-os.makedirs(models_path, exist_ok=True)
-
-pathT1A = './data/T1A/public'
-T1A_devvectors_path = os.path.join(pathT1A, 'dev_vectors')
-T1A_devprevalence_path = os.path.join(pathT1A, 'dev_prevalences.csv')
-T1A_trainpath = os.path.join(pathT1A, 'training_vectors.txt')
-
-train = LabelledCollection.load(T1A_trainpath, load_binary_vectors)
-nF = train.instances.shape[1]
-
-qp.environ['SAMPLE_SIZE'] = constants.T1A_SAMPLE_SIZE
-
-print(f'number of classes: {len(train.classes_)}')
-print(f'number of training documents: {len(train)}')
-print(f'training prevalence: {F.strprev(train.prevalence())}')
-print(f'training matrix shape: {train.instances.shape}')
-
-true_prevalence = ResultSubmission.load(T1A_devprevalence_path)
-
-param_grid = {
- 'C': np.logspace(-3,3,7),
- 'class_weight': ['balanced', None]
-}
-
-
-def gen_samples():
- return gen_load_samples_T1(T1A_devvectors_path, nF, ground_truth_path=T1A_devprevalence_path, return_id=False)
-
-
-for quantifier in [EMQ]: # [CC, ACC, PCC, PACC, EMQ, HDy]:
- if quantifier == EMQ:
- classifier = CalibratedClassifierCV(LogisticRegression(), n_jobs=-1)
- else:
- classifier = LogisticRegression()
- model = quantifier(classifier)
- print(f'{model.__class__.__name__}: Model selection')
- model = qp.model_selection.GridSearchQ(
- model,
- param_grid,
- sample_size=None,
- protocol='gen',
- error=qp.error.mae,
- refit=False,
- verbose=True
- ).fit(train, gen_samples)
-
- quantifier_name = model.best_model().__class__.__name__
- print(f'{quantifier_name} mae={model.best_score_:.3f} (params: {model.best_params_})')
-
- pickle.dump(model.best_model(),
- open(os.path.join(models_path, quantifier_name+'.pkl'), 'wb'),
- protocol=pickle.HIGHEST_PROTOCOL)
-
-
-"""
-validation
-CC 0.1862 1.9587
-ACC 0.0394 0.2669
-PCC 0.1789 2.1383
-PACC 0.0354 0.1587
-EMQ 0.0224 0.0960
-HDy 0.0467 0.2121
-"""
-
-
diff --git a/LeQua2022/baselines_T1B.py b/LeQua2022/baselines_T1B.py
deleted file mode 100644
index 1344bbc..0000000
--- a/LeQua2022/baselines_T1B.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import pickle
-
-import numpy as np
-from sklearn.linear_model import LogisticRegression
-from tqdm import tqdm
-import pandas as pd
-
-import quapy as qp
-from quapy.data import LabelledCollection
-from quapy.method.aggregative import *
-import quapy.functional as F
-from data import *
-import os
-import constants
-
-predictions_path = os.path.join('predictions', 'T1B') # multiclass - vector
-os.makedirs(predictions_path, exist_ok=True)
-
-pathT1B = './data/T1B/public'
-T1B_devvectors_path = os.path.join(pathT1B, 'dev_vectors')
-T1B_devprevalence_path = os.path.join(pathT1B, 'dev_prevalences.csv')
-T1B_trainpath = os.path.join(pathT1B, 'training_vectors.txt')
-T1B_catmap = os.path.join(pathT1B, 'training_vectors_label_map.txt')
-
-train = LabelledCollection.load(T1B_trainpath, load_binary_vectors)
-nF = train.instances.shape[1]
-
-qp.environ['SAMPLE_SIZE'] = constants.T1B_SAMPLE_SIZE
-
-print(f'number of classes: {len(train.classes_)}')
-print(f'number of training documents: {len(train)}')
-print(f'training prevalence: {F.strprev(train.prevalence())}')
-print(f'training matrix shape: {train.instances.shape}')
-
-true_prevalence = ResultSubmission.load(T1B_devprevalence_path)
-
-cat2code, categories = load_category_map(T1B_catmap)
-
-for quantifier in [PACC]: # [CC, ACC, PCC, PACC, EMQ]:
-
- classifier = CalibratedClassifierCV(LogisticRegression())
- model = quantifier(classifier).fit(train)
- quantifier_name = model.__class__.__name__
-
- predictions = ResultSubmission(categories=categories)
- for samplename, sample in tqdm(gen_load_samples_T1(T1B_devvectors_path, nF),
- desc=quantifier_name, total=len(true_prevalence)):
- predictions.add(samplename, model.quantify(sample))
-
- predictions.dump(os.path.join(predictions_path, quantifier_name + '.csv'))
- mae, mrae = evaluate_submission(true_prevalence, predictions)
- print(f'{quantifier_name} mae={mae:.3f} mrae={mrae:.3f}')
-
-
-
diff --git a/LeQua2022/constants.py b/LeQua2022/constants.py
index 11a78ce..7036eff 100644
--- a/LeQua2022/constants.py
+++ b/LeQua2022/constants.py
@@ -9,4 +9,11 @@ T1B_SAMPLE_SIZE = 1000
T2A_SAMPLE_SIZE = 250
T2B_SAMPLE_SIZE = 1000
+SAMPLE_SIZE={
+ 'T1A': T1A_SAMPLE_SIZE,
+ 'T1B': T1B_SAMPLE_SIZE,
+ 'T2A': T2A_SAMPLE_SIZE,
+ 'T2A': T2B_SAMPLE_SIZE
+}
+
ERROR_TOL = 1E-3
diff --git a/LeQua2022/data.py b/LeQua2022/data.py
index bcea49f..e4a1095 100644
--- a/LeQua2022/data.py
+++ b/LeQua2022/data.py
@@ -34,27 +34,23 @@ def load_category_map(path):
def load_binary_vectors(path, nF=None):
- return sklearn.datasets.load_svmlight_file(path, n_features=nF)
+ X, y = sklearn.datasets.load_svmlight_file(path, n_features=nF)
+ y = y.astype(int)
+ return X, y
def __gen_load_samples_with_groudtruth(path_dir:str, return_id:bool, ground_truth_path:str, load_fn, **load_kwargs):
true_prevs = ResultSubmission.load(ground_truth_path)
for id, prevalence in true_prevs.iterrows():
sample, _ = load_fn(os.path.join(path_dir, f'{id}.txt'), **load_kwargs)
- if return_id:
- yield id, sample, prevalence
- else:
- yield sample, prevalence
+ yield (id, sample, prevalence) if return_id else (sample, prevalence)
def __gen_load_samples_without_groudtruth(path_dir:str, return_id:bool, load_fn, **load_kwargs):
nsamples = len(glob(os.path.join(path_dir, '*.txt')))
for id in range(nsamples):
sample, _ = load_fn(os.path.join(path_dir, f'{id}.txt'), **load_kwargs)
- if return_id:
- yield id, sample
- else:
- yield sample
+ yield (id, sample) if return_id else sample
def gen_load_samples_T1(path_dir:str, nF:int, ground_truth_path:str = None, return_id=True):
@@ -68,6 +64,17 @@ def gen_load_samples_T1(path_dir:str, nF:int, ground_truth_path:str = None, retu
yield r
+def genSVD_load_samples_T1(load_fn, path_dir:str, nF:int, ground_truth_path:str = None, return_id=True):
+ if ground_truth_path is None:
+ # the generator function returns tuples (filename:str, sample:csr_matrix)
+ gen_fn = __gen_load_samples_without_groudtruth(path_dir, return_id, load_fn, nF=nF)
+ else:
+ # the generator function returns tuples (filename:str, sample:csr_matrix, prevalence:ndarray)
+ gen_fn = __gen_load_samples_with_groudtruth(path_dir, return_id, ground_truth_path, load_fn, nF=nF)
+ for r in gen_fn:
+ yield r
+
+
def gen_load_samples_T2A(path_dir:str, ground_truth_path:str = None):
# for ... : yield
pass
diff --git a/docs/build/html/_sources/index.rst.txt b/docs/build/html/_sources/index.rst.txt
index 18a0623..608daab 100644
--- a/docs/build/html/_sources/index.rst.txt
+++ b/docs/build/html/_sources/index.rst.txt
@@ -78,7 +78,7 @@ Features
Methods
Model Selection
Plotting
- API Developer documentation
+ API Developers documentation
diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html
index d1eb190..5d2c96f 100644
--- a/docs/build/html/genindex.html
+++ b/docs/build/html/genindex.html
@@ -230,8 +230,6 @@
compute_table() (quapy.method.aggregative.ThresholdOptimization method)
compute_tpr() (quapy.method.aggregative.ThresholdOptimization method)
-
- conv_block() (quapy.classification.neural.CNNnet method)
counts() (quapy.data.base.LabelledCollection method)
@@ -337,7 +335,7 @@
fetch_UCILabelledCollection() (in module quapy.data.datasets)
- fit() (quapy.classification.methods.PCALR method)
+ fit() (quapy.classification.methods.LowRankLogisticRegression method)
|
- |
+ |
@@ -673,8 +669,6 @@
PACC (class in quapy.method.aggregative)
parallel() (in module quapy.util)
-
- PCALR (class in quapy.classification.methods)
PCC (class in quapy.method.aggregative)
@@ -686,7 +680,7 @@
(quapy.method.aggregative.OneVsAll method)
- predict() (quapy.classification.methods.PCALR method)
+ predict() (quapy.classification.methods.LowRankLogisticRegression method)
- predict_proba() (quapy.classification.methods.PCALR method)
+ predict_proba() (quapy.classification.methods.LowRankLogisticRegression method)
- (quapy.classification.neural.NeuralClassifierTrainer method)
@@ -952,7 +946,7 @@
- se() (in module quapy.error)
- - set_params() (quapy.classification.methods.PCALR method)
+
- set_params() (quapy.classification.methods.LowRankLogisticRegression method)
- (quapy.classification.neural.NeuralClassifierTrainer method)
@@ -1032,7 +1026,7 @@
- training_helper() (in module quapy.method.aggregative)
- - transform() (quapy.classification.methods.PCALR method)
+
- transform() (quapy.classification.methods.LowRankLogisticRegression method)
-- API Developer documentation
+- API Developers documentation
diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv
index 76b8d3168f70e619d83c82c2d2a23c3d1d79eb11..318d51756e1a0bc26664fb0035d8a75d56e9414f 100644
GIT binary patch
delta 2612
zcmV-43d{AD6_^!}e}6Vtvv#H`m%TZ*cGnfjqZv8Q=7fSsNJ2~!TmZ7IJ?(4c_3|WX
zfIlKdh$4PChndlk=&$k9=x%@@e?Q>jI4h1nWh^>TCn$TIDcKR9Tk$s-+#!C6DU{KTFcBn4XAPF
zM_Vd&TXeyxlA
z7k;sRUIy#q3V$|TC)Wys_i_!Ms+*tmKljx=!k;;&IpJZt{Q^8n$Qz9(Z$pTybYA|wI#7HsMS~
zzA6}jcg&N?_`zp>U&R!&v9D#Sz|z$
z854bKEZ#Ec)@`6nw4z1pmt{@$X!{urB6LiZFF@Nv9yd!@|Hd~80>tKXWS|&%jtv(%
z&dF~$r3p%z5Ja=Fl$>r3l8nSBV44PNQkO5-kiNA6%XSotF3z-*^nh5jE>bGj5F(}W
z3MTJ7j(@YEIXEn%uNnu?l_vsuAYOmIe{pVr@+i*2IjXAfQF|e=6wWmm6EucN3%ns|
zN|Jj9e>Wb+#-{p&=(l5#nnai@`-~}0k-!<)Y`#M~0@)pwj9YiwNCh-c$VXT;&eO!e
zirFD|uowf$Y2p|-PIJh>@tIC09F{7Nn8bnHqhPClziJ+F!;WmpmpSxqe(C0oQvk>b+m
z?R7_6BI7;!=lx?VNAkg#BcwAC<}@Ym!iyo_sg&hH$bkXtamyxvBLaUMt0w}LmD&w%
zM}IzgR2KPMZ_k+aILB3_2a0f#pm)ZtX89GQd41$TLqi)=u3YO$euXzh9Symv8@Xw$
zqX9>-K0U+)#VkE!x$q%DD997cld3(g7)d_lXSt=x){eqSvmatNAxptBFSV|FC9Fe#
zEZ*FO1uW-a^`>$Ah9BSjb7LL<`NeK;-kP_=z}DbKuEgNryO8HBnZFn@?z
zj7v*Y^^;hN!BRR%x6~{3$yiSPYaos9-1@%pIE~rtP9o8-u$+OcgYXBc@|STzqa}}%
z{~UxQ34%Mh>>R4Ip%@LUy%e^9si%Os!W4|=QXpKwuzb+WrD8dxt2l+M2zB>gY!HX6
z-KdylD$|nnGhGW(i=w0CK*lm2qJJM`ZfHs+B_1poC&=KeJ4Z;aAeK_~zJ|Z8G&>{|
z#32c^!3=JToRzH>H~k4(-ncMNtDAVY<7B5poPh?YE>G^&M!orQ;&|-o_o|+>>U`$c%762kHctY=
zqk=)BIAy%=Gn{*Bgw`tuF2d?+T{k{>2&t`#n8%45HTbC9MNw#bYt=Q3XBk9SPBh5K
zw(x_`u0ycH8A^-n>o|?j-E;;Qr3CZpBy8wj9)9BJ2@gEigHLa-oFG(6;2+5YN8I^h
z>Cr799j0FtD1;AC9M2A(FMq%K^vLon{PENu%~$Kk#g`}Fu?5od?l<)KJpUE|6=t9T
z|I_?7&TQQXjCd88Fpg=T>bPw#W4@VCp1?e!O
zsX(5i+1s7@w`bRY#d~zka_uD?u|g^%B1rBz5qp*(a;9Y!PYq~BefXS8i#o<5-AgvqNL+yu+{6Fb~O
zx?zS$p&MA3ta;_Pm$nMFnYIeGo3;wDowoAplwf@lpe1cHo@iIEqa!eT{j$89dyECh
z|6}ps2YB)(eDxFm=6}@%&wu;TN4m}{zLc2;HlL~Y~(6ITaPo^)`i?iy)@Sdc_g~O$TunX@qjd@t5&m8|9{&L=EHW2*we@#Hw
z{ME#APhU+m2ir7Z-1AKn%`+u5Azd><6H9nDRvUw-!JaYvEPsrOGmE#Mzr#6pTT^5e;4ICC%Y7Pf8XUKB1r!<7^HoEy)hvCs19MM
z3vdrWU5;nyIe&vm9;DA*gIAX#0(FsK2e-i@72#(bOU7NjmmcY67wU)-7%J^ME}hsL
zL|**3*qFdkeQ*N8@Dq{CQr^xl8VExlSF&bZ{)&n<3UYjj_B2UI?uEPffhuc+hiklm
zQ>XCbJ&9lSanKOW>_Nd^c@QuuSiBEHa&W#zj+&XR@PCO2_)vRia}n+4yNozjhiZfg
z-@0X(=#PjvcrTVC{}Z2Qlt@3nJ!A{|N>aLKELk;YXXk22U3R9NbofkgCw(q6!AX&a
zJv-@hHh3p(E;E89Bt?R>-jp8odcv7p^b4oSnqVI94O_v(I@_|*2Amht1q@BF;0#)@
z1;=ce7Jq<2i?iSiSds-rSk$ISnH+5os&KXNLK}(&{59^~iJsstg&Z@r6-_AK)$iAg
zlGAUFp$$lKx|O5F1cm?$f@06!#|S}sJpP>@RXCbso*vhd6<6nxE*^7&B|&tHawZY^
zfUJCMy=qx72y#l0MoA{@Jy)NRL0g&pmQ#eZmqnos(_XwS0osxw?@!%tI^jux^P
zpz&);62u8C)_6R=dfKei>(O={DoIwrOea^~;(Vw-^C4vNQ^Mle?Wb9NO_zc)^V6G=
ztx~6}b-Bk}Na&Pa$@cZS?6UgadaGD|
zf5oi+Sf_5(Uv6NQ8#EQi+xkY+2<0G3ouy~!bX;|*EUd@(rp34dUmc-m^R5ofXlpnb
zP5ECU;{u)1x?gmKW*8?n$v*P$;&eB7MXFtq-)mM_e%(Fk2*KYqZyS*u|
zq6`W_wgf>*7`2FX3yh*9IhK2tinz37b$_?`u9@ck?8^Xh9Dgg>5^3saF2(@vumlC<
zFc)#T1;;5tauO&gINQLuXfaVgNDiskGMK;gYb*>%QD*7
zRj}zgxmFOpmw#(8p+tf`NmEQClcyTMC-cvJbr16w4hh9P&VXKkCnX!171h*DQH
zL^q^MiXz3aaa!U8wkx_g*0GNMQ5`7$C`FMYZSsOYiOj^JV
z&N7_dGxc}lVO(sgPe^__MyW{xxwcP4UR~Y!lGntsaJ=S_g%dKJY&a-&9x;*S^nd6Rn2idgDl}nQF4042bxmWkz?QST
zYF?4q}eXIn%Q%i8%SJCiT23nwDWncw{xTXp(LL$3+H9
zqj%SxY_UXp@X!0lR*n>cu}4U6qMFkLe~2K4{2)@63n34NT8}$6K{+Djj}cZmaAl=#
zgMZtRPad^JzSP?@wmr^q73qNzn5N)^ai>{+1!-R&1<=6I#*{DDd6Hk@O;JZnuIfi_
z8tZAmiCUi?5C({4hn$KKk_rVhg*>g=v#`
z^ds@^FD%q@UIZxD-R33Daha`U+O>~20DsImJi+hF6|IGV{$*Lyjq_fF1>hk!7Vf*?cImdQV&$yrq&
zq88)Q5mo)fj$l9i?09mHK2Xul}`=#&>Rg-}(ZO-Rw>x$YWS4YGCc9umem#1*95NFhZpOuu#MDNphcxQI)R3Ol3u&yZ_>X
zIOOd{A(rb*OV-bH9Y`&TiIM{u%YS%?d62mw8Ic4Bu+%uI49>Z8gyb6HC{^!kgxgB9
zLqe3eN&;OlgWDo!Wvj(ae}a}bF3i*FCfw~f-kFdWPDt+V@B54FHsqRg^MBj2%?VUJ
zNjrve?CWwTgi0iILSej_ej6f)SNX*1N&E4RruTZI-u^goJofZ^RZm(W34eO7Jg;r@
zBqBU1m}(R!jQ4$pcTbJbdgXh4(jtdCPAhaboxvp;hP-+cHg+!$KXD9%N1p4^r*}|J5Gy4JkK~af
z?tHNf=oTd%r(ZNEh7Zu3z<&;&FP}qtWcjTA1nN)btM%jJ+f(S+D$?@q7x4Hp{~iGq
zXP^}*e1PJmO!LnIBWc#EOQ=@RAtDz{h$W#)foK-#0ob1(jsZrh
zq~naH1_h30Z+GV3fnB34KA>xs>mcET6;c}!26E4_*t0AR>73>IYkyF6zQ(SPnstz_
zKQJyrN9jcR8i@Kv@vFGYvVfkd5piZkLmPMx&K(vf{qEudqiq8V^Z^YgPF~&MCRom&
z*zp$94Kqdx-N52x&8x7zv{kgtv{kI#v{i)dv{hKAMC+3XEoqzaM7w$&9g*4V*X7+j
zU@VmUKNb&RfTv)>bAOol_n{1ehi%$0{`sbj7MK#+kiHqAjU@sbYm6b#V1LgTVHQTjnZ>)$-{B|R
zW5j!d?nY7nKu;&4MU#1zpSx>K=Khk^9;uILwR7box(aA36*#X#u!GTdWRuss#p455
zeOo-)j-X%}LC#bB3>M=1yC~l|*`@gV`z|LHQTnIB0ORxPjR6rx
z^$0^5aWX%YH`zRz2=WFHYnb``T0DmhV>h5eVqWyfA73b?vtuPT<
zw;W<~h=_;xVibhG@p(o`%=6nrz94TnBYVcuRrBubTo0+w&XkvqkO}Uk&u1ohDGIP>
zFMZwy@1@OWMz9oTK;YJw(xX04IFn2M&Plq)kfXh2D|lFETQ=D!=LJjwOA{P8gBI+-
zv0J7CV1Ll!95@4(NpdadjS<;)r7?F(6w&nFRP#
zW#wb*Rm&=az^4Rhlw`_2s6IvpY)OXwq&ig=qkkECKAHc5JM)G9hLh2h
zKW7C*Z*Yh0=l1smEl2Ln>?i(ru;whj9xJT*nt#?lv9BXo1+SiHE6
quapy.classification.methods module
--
-class quapy.classification.methods.PCALR(n_components=100, **kwargs)
+-
+class quapy.classification.methods.LowRankLogisticRegression(n_components=100, **kwargs)
Bases: sklearn.base.BaseEstimator
-An example of a classification method that also generates embedded inputs, as those required for QuaNet.
-This example simply combines a Principal Component Analysis (PCA) with Logistic Regression (LR).
-
--
-fit(X, y)
-
-
-
--
-get_params()
-Get parameters for this estimator.
+An example of a classification method (i.e., an object that implements fit, predict, and predict_proba)
+that also generates embedded inputs (i.e., that implements transform), as those required for
+quapy.method.neural.QuaNet
. This is a mock method to allow for easily instantiating
+quapy.method.neural.QuaNet
on array-like real-valued instances.
+The transformation consists of applying sklearn.decomposition.TruncatedSVD
+while classification is performed using sklearn.linear_model.LogisticRegression
on the low-rank space
- Parameters
-deep (bool, default=True) – If True, will return the parameters for this estimator and
-contained subobjects that are estimators.
+-
+
+
+
+-
+fit(X, y)
+Fit the model according to the given training data. The fit consists of
+fitting TruncatedSVD and Logistic Regression.
+
+- Parameters
+
+X – array-like of shape (n_samples, n_features) with the instances
+y – array-like of shape (n_samples, n_classes) with the class labels
+
- Returns
-params – Parameter names mapped to their values.
-
-- Return type
-dict
+self
--
-predict(X)
-
+-
+get_params()
+Get hyper-parameters for this estimator
+
+- Returns
+a dictionary with parameter names mapped to their values
+
+
+
--
-predict_proba(X)
-
+-
+predict(X)
+Predicts labels for the instances X
+
+- Parameters
+X – array-like of shape (n_samples, n_features) instances to classify
+
+- Returns
+a numpy array of length n containing the label predictions, where n is the number of
+instances in X
+
+
+
--
-set_params(**params)
+-
+predict_proba(X)
+Predicts posterior probabilities for the instances X
+
+- Parameters
+X – array-like of shape (n_samples, n_features) instances to classify
+
+- Returns
+array-like of shape (n_samples, n_classes) with the posterior probabilities
+
+
+
+
+
+-
+set_params(**params)
Set the parameters of this estimator.
-The method works on simple estimators as well as on nested objects
-(such as Pipeline
). The latter have
-parameters of the form <component>__<parameter>
so that it’s
-possible to update each component of a nested object.
- Parameters
-**params (dict) – Estimator parameters.
-
-- Returns
-self – Estimator instance.
-
-- Return type
-estimator instance
+parameters – a **kwargs dictionary with the estimator parameters for
+Logistic Regression
+and eventually also n_components for PCA
--
-transform(X)
-
+-
+transform(X)
+Returns the low-rank approximation of X with n_components dimensions
+
+- Parameters
+X – array-like of shape (n_samples, n_features) instances to embed
+
+- Returns
+array-like of shape (n_samples, n_components) with the embedded instances
+
+
+
@@ -132,25 +171,63 @@ possible to update each component of a nested object.
class quapy.classification.neural.CNNnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5)
Bases: quapy.classification.neural.TextClassifierNet
-
--
-conv_block(input, conv_layer)
-
-
+An implementation of quapy.classification.neural.TextClassifierNet
based on
+Convolutional Neural Networks.
+
+- Parameters
+
+vocabulary_size – the size of the vocabulary
+n_classes – number of target classes
+embedding_size – the dimensionality of the word embeddings space (default 100)
+hidden_size – the dimensionality of the hidden space (default 256)
+repr_size – the dimensionality of the document embeddings space (default 100)
+kernel_heights – list of kernel lengths (default [3,5,7]), i.e., the number of
+consecutive tokens that each kernel covers
+stride – convolutional stride (default 1)
+stride – convolutional pad (default 0)
+drop_p – drop probability for dropout (default 0.5)
+
+
+
-
document_embedding(input)
-
+Embeds documents (i.e., performs the forward pass up to the
+next-to-last layer).
+
+- Parameters
+input – a batch of instances, typically generated by a torch’s DataLoader
+instance (see quapy.classification.neural.TorchDataset
)
+
+- Returns
+a torch tensor of shape (n_samples, n_dimensions), where
+n_samples is the number of documents, and n_dimensions is the
+dimensionality of the embedding
+
+
+
-
get_params()
-
+Get hyper-parameters for this estimator
+
+- Returns
+a dictionary with parameter names mapped to their values
+
+
+
-
property vocabulary_size
-
+Return the size of the vocabulary
+
+- Returns
+integer
+
+
+
@@ -158,25 +235,60 @@ possible to update each component of a nested object.
class quapy.classification.neural.LSTMnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1, drop_p=0.5)
Bases: quapy.classification.neural.TextClassifierNet
+An implementation of quapy.classification.neural.TextClassifierNet
based on
+Long Short Term Memory networks.
+
+- Parameters
+
+vocabulary_size – the size of the vocabulary
+n_classes – number of target classes
+embedding_size – the dimensionality of the word embeddings space (default 100)
+hidden_size – the dimensionality of the hidden space (default 256)
+repr_size – the dimensionality of the document embeddings space (default 100)
+lstm_class_nlayers – number of LSTM layers (default 1)
+drop_p – drop probability for dropout (default 0.5)
+
+
+
-
document_embedding(x)
-
+Embeds documents (i.e., performs the forward pass up to the
+next-to-last layer).
+
+- Parameters
+x – a batch of instances, typically generated by a torch’s DataLoader
+instance (see quapy.classification.neural.TorchDataset
)
+
+- Returns
+a torch tensor of shape (n_samples, n_dimensions), where
+n_samples is the number of documents, and n_dimensions is the
+dimensionality of the embedding
+
+
+
-
get_params()
-
-
-
--
-init_hidden(set_size)
-
+Get hyper-parameters for this estimator
+
+- Returns
+a dictionary with parameter names mapped to their values
+
+
+
-
property vocabulary_size
-
+Return the size of the vocabulary
+
+- Returns
+integer
+
+
+
@@ -184,45 +296,135 @@ possible to update each component of a nested object.
class quapy.classification.neural.NeuralClassifierTrainer(net: quapy.classification.neural.TextClassifierNet, lr=0.001, weight_decay=0, patience=10, epochs=200, batch_size=64, batch_size_test=512, padding_length=300, device='cpu', checkpointpath='../checkpoint/classifier_net.dat')
Bases: object
+Trains a neural network for text classification.
+
+- Parameters
+
+net – an instance of TextClassifierNet implementing the forward pass
+lr – learning rate (default 1e-3)
+weight_decay – weight decay (default 0)
+patience – number of epochs that do not show any improvement in validation
+to wait before applying early stop (default 10)
+epochs – maximum number of training epochs (default 200)
+batch_size – batch size for training (default 64)
+batch_size_test – batch size for test (default 512)
+padding_length – maximum number of tokens to consider in a document (default 300)
+device – specify ‘cpu’ (default) or ‘cuda’ for enabling gpu
+checkpointpath – where to store the parameters of the best model found so far
+according to the evaluation in the held-out validation split (default ‘../checkpoint/classifier_net.dat’)
+
+
+
-
property device
-
+Gets the device in which the network is allocated
+
+- Returns
+device
+
+
+
-
fit(instances, labels, val_split=0.3)
-
+Fits the model according to the given training data.
+
+- Parameters
+
+instances – list of lists of indexed tokens
+labels – array-like of shape (n_samples, n_classes) with the class labels
+val_split – proportion of training documents to be taken as the validation set (default 0.3)
+
+
+- Returns
+-
+
+
+
-
get_params()
-
+Get hyper-parameters for this estimator
+
+- Returns
+a dictionary with parameter names mapped to their values
+
+
+
-
predict(instances)
-
+Predicts labels for the instances
+
+- Parameters
+instances – list of lists of indexed tokens
+
+- Returns
+a numpy array of length n containing the label predictions, where n is the number of
+instances in X
+
+
+
-
predict_proba(instances)
-
+Predicts posterior probabilities for the instances
+
+- Parameters
+X – array-like of shape (n_samples, n_features) instances to classify
+
+- Returns
+array-like of shape (n_samples, n_classes) with the posterior probabilities
+
+
+
-
reset_net_params(vocab_size, n_classes)
-
+Reinitialize the network parameters
+
+- Parameters
+-
+
+
+
-
set_params(**params)
-
+Set the parameters of this trainer and the learner it is training.
+In this current version, parameter names for the trainer and learner should
+be disjoint.
+
+- Parameters
+params – a **kwargs dictionary with the parameters
+
+
+
-
transform(instances)
-
+Returns the embeddings of the instances
+
+- Parameters
+instances – list of lists of indexed tokens
+
+- Returns
+array-like of shape (n_samples, embed_size) with the embedded instances,
+where embed_size is defined by the classification network
+
+
+
@@ -230,49 +432,95 @@ possible to update each component of a nested object.
class quapy.classification.neural.TextClassifierNet
Bases: torch.nn.modules.module.Module
+Abstract Text classifier (torch.nn.Module)
-
dimensions()
-
+Gets the number of dimensions of the embedding space
+
+- Returns
+integer
+
+
+
-
abstract document_embedding(x)
-
+Embeds documents (i.e., performs the forward pass up to the
+next-to-last layer).
+
+- Parameters
+x – a batch of instances, typically generated by a torch’s DataLoader
+instance (see quapy.classification.neural.TorchDataset
)
+
+- Returns
+a torch tensor of shape (n_samples, n_dimensions), where
+n_samples is the number of documents, and n_dimensions is the
+dimensionality of the embedding
+
+
+
-
forward(x)
-Defines the computation performed at every call.
-Should be overridden by all subclasses.
-
-
Note
-
Although the recipe for forward pass needs to be defined within
-this function, one should call the Module
instance afterwards
-instead of this since the former takes care of running the
-registered hooks while the latter silently ignores them.
-
+Performs the forward pass.
+
+- Parameters
+x – a batch of instances, typically generated by a torch’s DataLoader
+instance (see quapy.classification.neural.TorchDataset
)
+
+- Returns
+a tensor of shape (n_instances, n_classes) with the decision scores
+for each of the instances and classes
+
+
-
abstract get_params()
-
+Get hyper-parameters for this estimator
+
+- Returns
+a dictionary with parameter names mapped to their values
+
+
+
-
predict_proba(x)
-
+Predicts posterior probabilities for the instances in x
+
+- Parameters
+x – a torch tensor of indexed tokens with shape (n_instances, pad_length)
+where n_instances is the number of instances in the batch, and pad_length
+is length of the pad in the batch
+
+- Returns
+array-like of shape (n_samples, n_classes) with the posterior probabilities
+
+
+
-
property vocabulary_size
-
+Return the size of the vocabulary
+
+- Returns
+integer
+
+
+
-
xavier_uniform()
-
+Performs Xavier initialization of the network parameters
+
@@ -280,10 +528,36 @@ registered hooks while the latter silently ignores them.
class quapy.classification.neural.TorchDataset(instances, labels=None)
Bases: torch.utils.data.dataset.Dataset
+Transforms labelled instances into a Torch’s torch.utils.data.DataLoader
object
+
+- Parameters
+
+instances – list of lists of indexed tokens
+labels – array-like of shape (n_samples, n_classes) with the class labels
+
+
+
-
asDataloader(batch_size, shuffle, pad_length, device)
-
+Converts the labelled collection into a Torch DataLoader with dynamic padding for
+the batch
+
+- Parameters
+
+batch_size – batch size
+shuffle – whether or not to shuffle instances
+pad_length – the maximum length for the list of tokens (dynamic padding is
+applied, meaning that if the longest document in the batch is shorter than
+pad_length, then the batch is padded up to its length, and not to pad_length.
+device – whether to allocate tensors in cpu or in cuda
+
+
+- Returns
+a torch.utils.data.DataLoader
object
+
+
+
@@ -294,38 +568,79 @@ registered hooks while the latter silently ignores them.
class quapy.classification.svmperf.SVMperf(svmperf_base, C=0.01, verbose=False, loss='01')
Bases: sklearn.base.BaseEstimator
, sklearn.base.ClassifierMixin
+A wrapper for the SVM-perf package by Thorsten Joachims.
+When using losses for quantification, the source code has to be patched. See
+the installation documentation
+for further details.
+References:
+
+
+
+- Parameters
+
+svmperf_base – path to directory containing the binary files svm_perf_learn and svm_perf_classify
+C – trade-off between training error and margin (default 0.01)
+verbose – set to True to print svm-perf std outputs
+loss – the loss to optimize for. Available losses are “01”, “f1”, “kld”, “nkld”, “q”, “qacc”, “qf1”, “qgm”, “mae”, “mrae”.
+
+
+
-
decision_function(X, y=None)
-
+Evaluate the decision function for the samples in X.
+
+- Parameters
+-
+
+- Returns
+array-like of shape (n_samples,) containing the decision scores of the instances
+
+
+
-
fit(X, y)
-
+Trains the SVM for the multivariate performance loss
+
+- Parameters
+-
+
+- Returns
+self
+
+
+
-
predict(X)
-
+Predicts labels for the instances X
+:param X: array-like of shape (n_samples, n_features) instances to classify
+:return: a numpy array of length n containing the label predictions, where n is the number of
+
+
+
-
set_params(**parameters)
-Set the parameters of this estimator.
-The method works on simple estimators as well as on nested objects
-(such as Pipeline
). The latter have
-parameters of the form <component>__<parameter>
so that it’s
-possible to update each component of a nested object.
+Set the hyper-parameters for svm-perf. Currently, only the C parameter is supported
- Parameters
-**params (dict) – Estimator parameters.
-
-- Returns
-self – Estimator instance.
-
-- Return type
-estimator instance
+parameters – a **kwargs dictionary {‘C’: <float>}
diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js
index b400270..b8a80a1 100644
--- a/docs/build/html/searchindex.js
+++ b/docs/build/html/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["Datasets","Evaluation","Installation","Methods","Model-Selection","Plotting","index","modules","quapy","quapy.classification","quapy.data","quapy.method"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["Datasets.md","Evaluation.md","Installation.rst","Methods.md","Model-Selection.md","Plotting.md","index.rst","modules.rst","quapy.rst","quapy.classification.rst","quapy.data.rst","quapy.method.rst"],objects:{"":{quapy:[8,0,0,"-"]},"quapy.classification":{methods:[9,0,0,"-"],neural:[9,0,0,"-"],svmperf:[9,0,0,"-"]},"quapy.classification.methods":{PCALR:[9,1,1,""]},"quapy.classification.methods.PCALR":{fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural":{CNNnet:[9,1,1,""],LSTMnet:[9,1,1,""],NeuralClassifierTrainer:[9,1,1,""],TextClassifierNet:[9,1,1,""],TorchDataset:[9,1,1,""]},"quapy.classification.neural.CNNnet":{conv_block:[9,2,1,""],document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.LSTMnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],init_hidden:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.NeuralClassifierTrainer":{device:[9,3,1,""],fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],reset_net_params:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural.TextClassifierNet":{dimensions:[9,2,1,""],document_embedding:[9,2,1,""],forward:[9,2,1,""],get_params:[9,2,1,""],predict_proba:[9,2,1,""],vocabulary_size:[9,3,1,""],xavier_uniform:[9,2,1,""]},"quapy.classification.neural.TorchDataset":{asDataloader:[9,2,1,""]},"quapy.classification.svmperf":{SVMperf:[9,1,1,""]},"quapy.classification.svmperf.SVMperf":{decision_function:[9,2,1,""],fit:[9,2,1,""],predict:[9,2,1,""],set_params:[9,2,1,""],valid_losses:[9,4,1,""]},"quapy.data":{base:[10,0,0,"-"],datasets:[10,0,0,"-"],preprocessing:[10,0,0,"-"],reader:[10,0,0,"-"]},"quapy.data.base":{Dataset:[10,1,1,""],LabelledCollection:[10,1,1,""],isbinary:[10,5,1,""]},"quapy.data.base.Dataset":{SplitStratified:[10,2,1,""],binary:[10,3,1,""],classes_:[10,3,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],stats:[10,2,1,""],vocabulary_size:[10,3,1,""]},"quapy.data.base.LabelledCollection":{Xy:[10,3,1,""],artificial_sampling_generator:[10,2,1,""],artificial_sampling_index_generator:[10,2,1,""],binary:[10,3,1,""],counts:[10,2,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],natural_sampling_generator:[10,2,1,""],natural_sampling_index_generator:[10,2,1,""],prevalence:[10,2,1,""],sampling:[10,2,1,""],sampling_from_index:[10,2,1,""],sampling_index:[10,2,1,""],split_stratified:[10,2,1,""],stats:[10,2,1,""],uniform_sampling:[10,2,1,""],uniform_sampling_index:[10,2,1,""]},"quapy.data.datasets":{df_replace:[10,5,1,""],fetch_UCIDataset:[10,5,1,""],fetch_UCILabelledCollection:[10,5,1,""],fetch_reviews:[10,5,1,""],fetch_twitter:[10,5,1,""],warn:[10,5,1,""]},"quapy.data.preprocessing":{IndexTransformer:[10,1,1,""],index:[10,5,1,""],reduce_columns:[10,5,1,""],standardize:[10,5,1,""],text2tfidf:[10,5,1,""]},"quapy.data.preprocessing.IndexTransformer":{add_word:[10,2,1,""],fit:[10,2,1,""],fit_transform:[10,2,1,""],index:[10,2,1,""],transform:[10,2,1,""],vocabulary_size:[10,2,1,""]},"quapy.data.reader":{binarize:[10,5,1,""],from_csv:[10,5,1,""],from_sparse:[10,5,1,""],from_text:[10,5,1,""],reindex_labels:[10,5,1,""]},"quapy.error":{absolute_error:[8,5,1,""],acc_error:[8,5,1,""],acce:[8,5,1,""],ae:[8,5,1,""],f1_error:[8,5,1,""],f1e:[8,5,1,""],from_name:[8,5,1,""],kld:[8,5,1,""],mae:[8,5,1,""],mean_absolute_error:[8,5,1,""],mean_relative_absolute_error:[8,5,1,""],mkld:[8,5,1,""],mnkld:[8,5,1,""],mrae:[8,5,1,""],mse:[8,5,1,""],nkld:[8,5,1,""],rae:[8,5,1,""],relative_absolute_error:[8,5,1,""],se:[8,5,1,""],smooth:[8,5,1,""]},"quapy.evaluation":{artificial_prevalence_prediction:[8,5,1,""],artificial_prevalence_protocol:[8,5,1,""],artificial_prevalence_report:[8,5,1,""],evaluate:[8,5,1,""],gen_prevalence_prediction:[8,5,1,""],natural_prevalence_prediction:[8,5,1,""],natural_prevalence_protocol:[8,5,1,""],natural_prevalence_report:[8,5,1,""]},"quapy.functional":{HellingerDistance:[8,5,1,""],adjusted_quantification:[8,5,1,""],artificial_prevalence_sampling:[8,5,1,""],get_nprevpoints_approximation:[8,5,1,""],normalize_prevalence:[8,5,1,""],num_prevalence_combinations:[8,5,1,""],prevalence_from_labels:[8,5,1,""],prevalence_from_probabilities:[8,5,1,""],prevalence_linspace:[8,5,1,""],strprev:[8,5,1,""],uniform_prevalence_sampling:[8,5,1,""],uniform_simplex_sampling:[8,5,1,""]},"quapy.method":{aggregative:[11,0,0,"-"],base:[11,0,0,"-"],meta:[11,0,0,"-"],neural:[11,0,0,"-"],non_aggregative:[11,0,0,"-"]},"quapy.method.aggregative":{ACC:[11,1,1,""],AdjustedClassifyAndCount:[11,4,1,""],AggregativeProbabilisticQuantifier:[11,1,1,""],AggregativeQuantifier:[11,1,1,""],CC:[11,1,1,""],ClassifyAndCount:[11,4,1,""],ELM:[11,1,1,""],EMQ:[11,1,1,""],ExpectationMaximizationQuantifier:[11,4,1,""],ExplicitLossMinimisation:[11,4,1,""],HDy:[11,1,1,""],HellingerDistanceY:[11,4,1,""],MAX:[11,1,1,""],MS2:[11,1,1,""],MS:[11,1,1,""],MedianSweep2:[11,4,1,""],MedianSweep:[11,4,1,""],OneVsAll:[11,1,1,""],PACC:[11,1,1,""],PCC:[11,1,1,""],ProbabilisticAdjustedClassifyAndCount:[11,4,1,""],ProbabilisticClassifyAndCount:[11,4,1,""],SVMAE:[11,1,1,""],SVMKLD:[11,1,1,""],SVMNKLD:[11,1,1,""],SVMQ:[11,1,1,""],SVMRAE:[11,1,1,""],T50:[11,1,1,""],ThresholdOptimization:[11,1,1,""],X:[11,1,1,""],training_helper:[11,5,1,""]},"quapy.method.aggregative.ACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""],solve_adjustment:[11,2,1,""]},"quapy.method.aggregative.AggregativeProbabilisticQuantifier":{posterior_probabilities:[11,2,1,""],predict_proba:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.AggregativeQuantifier":{aggregate:[11,2,1,""],aggregative:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],learner:[11,3,1,""],n_classes:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.CC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ELM":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.EMQ":{EM:[11,2,1,""],EPSILON:[11,4,1,""],MAX_ITER:[11,4,1,""],aggregate:[11,2,1,""],fit:[11,2,1,""],predict_proba:[11,2,1,""]},"quapy.method.aggregative.HDy":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.MS":{optimize_threshold:[11,2,1,""]},"quapy.method.aggregative.MS2":{optimize_threshold:[11,2,1,""]},"quapy.method.aggregative.OneVsAll":{aggregate:[11,2,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],posterior_probabilities:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.PACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.PCC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ThresholdOptimization":{aggregate:[11,2,1,""],compute_fpr:[11,2,1,""],compute_table:[11,2,1,""],compute_tpr:[11,2,1,""],fit:[11,2,1,""],optimize_threshold:[11,2,1,""]},"quapy.method.base":{BaseQuantifier:[11,1,1,""],BinaryQuantifier:[11,1,1,""],isaggregative:[11,5,1,""],isbinary:[11,5,1,""],isprobabilistic:[11,5,1,""]},"quapy.method.base.BaseQuantifier":{aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.base.BinaryQuantifier":{binary:[11,3,1,""]},"quapy.method.meta":{EACC:[11,5,1,""],ECC:[11,5,1,""],EEMQ:[11,5,1,""],EHDy:[11,5,1,""],EPACC:[11,5,1,""],Ensemble:[11,1,1,""],ensembleFactory:[11,5,1,""],get_probability_distribution:[11,5,1,""]},"quapy.method.meta.Ensemble":{VALID_POLICIES:[11,4,1,""],accuracy_policy:[11,2,1,""],aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],ds_policy:[11,2,1,""],ds_policy_get_posteriors:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],ptr_policy:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""],sout:[11,2,1,""]},"quapy.method.neural":{QuaNetModule:[11,1,1,""],QuaNetTrainer:[11,1,1,""],mae_loss:[11,5,1,""]},"quapy.method.neural.QuaNetModule":{device:[11,3,1,""],forward:[11,2,1,""],init_hidden:[11,2,1,""]},"quapy.method.neural.QuaNetTrainer":{classes_:[11,3,1,""],clean_checkpoint:[11,2,1,""],clean_checkpoint_dir:[11,2,1,""],epoch:[11,2,1,""],fit:[11,2,1,""],get_aggregative_estims:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.non_aggregative":{MaximumLikelihoodPrevalenceEstimation:[11,1,1,""]},"quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation":{classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.model_selection":{GridSearchQ:[8,1,1,""]},"quapy.model_selection.GridSearchQ":{best_model:[8,2,1,""],classes_:[8,3,1,""],fit:[8,2,1,""],get_params:[8,2,1,""],quantify:[8,2,1,""],set_params:[8,2,1,""]},"quapy.plot":{binary_bias_bins:[8,5,1,""],binary_bias_global:[8,5,1,""],binary_diagonal:[8,5,1,""],error_by_drift:[8,5,1,""],save_or_show:[8,5,1,""]},"quapy.util":{EarlyStop:[8,1,1,""],create_if_not_exist:[8,5,1,""],create_parent_dir:[8,5,1,""],download_file:[8,5,1,""],download_file_if_not_exists:[8,5,1,""],get_quapy_home:[8,5,1,""],map_parallel:[8,5,1,""],parallel:[8,5,1,""],pickled_resource:[8,5,1,""],save_text_file:[8,5,1,""],temp_seed:[8,5,1,""]},quapy:{classification:[9,0,0,"-"],data:[10,0,0,"-"],error:[8,0,0,"-"],evaluation:[8,0,0,"-"],functional:[8,0,0,"-"],isbinary:[8,5,1,""],method:[11,0,0,"-"],model_selection:[8,0,0,"-"],plot:[8,0,0,"-"],util:[8,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","property","Python property"],"4":["py","attribute","Python attribute"],"5":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:property","4":"py:attribute","5":"py:function"},terms:{"0":[0,1,3,4,5,8,9,10,11],"00":[0,1,4,8],"000":1,"0001":[4,11],"000e":1,"001":[4,9,11],"009":1,"01":[8,9,11],"017":1,"018":0,"02":1,"021":0,"02552":4,"03":1,"034":1,"035":1,"037":1,"04":1,"041":1,"042":1,"046":1,"048":1,"05":[5,8],"055":1,"063":0,"065":0,"070":1,"073":1,"075":1,"078":0,"081":0,"082":[0,1],"083":0,"086":0,"091":1,"099":0,"1":[0,1,3,4,5,8,9,10,11],"10":[0,1,4,5,8,9,11],"100":[0,1,3,4,5,9,10,11],"1000":[0,4,11],"10000":4,"100000":4,"101":[4,10],"1010":4,"1024":11,"104":0,"108":1,"109":0,"11":[0,1,6],"11338":0,"114":1,"1145":[],"12":9,"120":0,"1215742":0,"1271":0,"13":[0,9],"139":0,"14":[3,11],"142":1,"146":[3,11],"1473":0,"148":0,"1484":0,"15":[3,8,11],"150":0,"153":0,"157":0,"158":0,"159":0,"1593":0,"1594":0,"1599":0,"161":0,"163":[0,1],"164":[0,3,11],"167":0,"17":0,"1771":1,"1775":[0,3],"1778":[0,3],"178":0,"1823":0,"1839":0,"18399":0,"1853":0,"19":[3,10,11],"193":0,"199151":0,"19982":4,"1st":0,"2":[0,1,3,5,8,10,11],"20":[5,8,11],"200":[1,9],"2000":0,"2002":[3,11],"2011":4,"2013":[3,11],"2015":[0,2,3,11],"2016":[3,10,11],"2017":[0,3,11],"2018":[0,3,10],"2019":[3,11],"2020":4,"20342":4,"206":0,"207":0,"208":0,"21":[1,3,5,8,11],"210":8,"211":0,"2126":0,"2155":0,"21591":0,"218":[3,11],"2184":0,"219e":1,"22":[0,3,9,10,11],"222":0,"222046":0,"226":0,"229":1,"229399":0,"23":9,"235":1,"238":0,"2390":0,"24":[0,9],"243":0,"248563":0,"24866":4,"24987":4,"25":[0,5,8,9,11],"25000":0,"256":[0,9],"26":9,"261":0,"265":0,"266":0,"267":0,"27":[1,3,9,11],"270":0,"2700406":[],"271":0,"272":0,"274":0,"275":1,"27th":[0,3,10],"28":3,"280":0,"281":0,"282":0,"283":[0,1],"288":0,"289":0,"2971":0,"2nd":0,"2t":1,"2x5fcv":0,"3":[0,1,3,5,6,8,9,10,11],"30":[0,1,3,11],"300":[0,1,9],"305":0,"306":0,"312":0,"32":[0,6],"33":[0,5],"331":0,"333":0,"335":0,"337":0,"34":[0,3,11],"341":0,"346":1,"347":0,"350":0,"351":0,"357":1,"359":0,"361":0,"366":1,"372":0,"373":0,"376132":0,"3765":0,"3813":0,"3821":0,"383e":1,"387e":1,"392":0,"394":0,"399":0,"3f":[1,6],"3rd":0,"4":[0,1,3,4,5,8,11],"40":[0,3,4,11],"404333":0,"407":0,"41":[3,11],"412":0,"412e":1,"413":0,"414":0,"417":0,"41734":4,"42":[1,8],"421":0,"4259":0,"426e":1,"427":0,"430":0,"434":0,"435":1,"43676":4,"437":0,"44":0,"446":0,"45":[3,5,11],"452":0,"459":1,"4601":0,"461":0,"463":0,"465":0,"466":0,"470":0,"48":[3,11],"481":0,"48135":4,"486":0,"4898":0,"492":0,"496":0,"4960":1,"497":0,"5":[0,1,3,4,5,8,9,10,11],"50":[0,5,8,11],"500":[0,1,4,5,11],"5000":[1,5],"5005":4,"507":0,"508":0,"512":[9,11],"514":0,"515e":1,"530":0,"534":0,"535":0,"535e":1,"5379":4,"539":0,"541":1,"546":0,"5473":0,"54it":4,"55":5,"55it":4,"565":1,"569":0,"57":0,"573":0,"578":1,"583":0,"591":[3,11],"5f":4,"5fcv":11,"6":[0,1,3,5,8,10,11],"60":0,"600":1,"601":0,"604":[3,11],"606":0,"625":0,"627":0,"633e":1,"634":1,"64":[9,11],"640":0,"641":0,"650":0,"653":0,"654":1,"66":[1,11],"665":0,"667":0,"669":0,"67":5,"683":0,"688":0,"691":0,"694582":0,"7":[1,5,9],"70":0,"700":0,"701e":1,"711":0,"717":1,"725":1,"730":0,"735":0,"740e":1,"748":0,"75":[0,5,8],"762":0,"774":0,"778":0,"787":0,"794":0,"798":0,"8":[0,1,5,10,11],"8000":0,"830":0,"837":1,"858":1,"861":0,"87":[0,3,11],"8788":0,"889504":0,"8d2fhsgcvn0aaaaa":[],"9":[0,1,3,5,11],"90":[5,8],"901":0,"909":1,"914":1,"917":0,"919":0,"922":0,"923":0,"935":0,"936":0,"937":0,"945":1,"95":8,"9533":0,"958":0,"97":0,"979":0,"982":0,"99":8,"abstract":[3,9,11],"case":[0,1,3,4,5,8,11],"class":[0,1,3,4,5,6,8,9,10,11],"d\u00edez":[3,11],"default":[1,3,8,9,10],"do":[0,1,3,4,8],"final":[1,3,5],"float":[0,3,8,10,11],"function":[0,1,3,4,5,6,7,9,11],"g\u00e1llego":[0,3,11],"gonz\u00e1lez":[3,11],"import":[0,1,3,4,5,6],"int":[0,5,8,10,11],"long":4,"new":[0,3,10,11],"p\u00e9rez":[0,3,11],"return":[0,1,3,4,5,8,9,10,11],"rodr\u0131":[3,11],"static":[3,11],"true":[0,1,3,4,5,6,8,9,10,11],"try":4,"while":[3,5,8,9,11],A:[0,3,8,10,11],As:[3,4],By:[1,3,8],For:[0,1,5,6,8,11],If:[3,5,8,9,11],In:[0,1,2,3,4,5,6,11],It:[3,4,5],One:[0,1,3,11],That:[1,4],The:[0,1,2,4,5,6,8,9,10,11],Then:3,These:0,To:[5,10],_:5,__:9,__class__:5,__name__:5,_adjust:[],_ae_:[],_classify_:11,_error_name_:11,_fit_learner_:11,_kld_:[],_labelledcollection_:11,_learner_:11,_mean:[],_min_df_:10,_nkld_:[],_posterior_probabilities_:11,_q_:[],_rae_:[],_svmperf_:[],ab:[],aboud:3,about:[0,5],abov:[0,3,5],absolut:[1,3,5,6],absolute_error:8,abstractmethod:3,acc:[1,3,5,6,8,11],acc_error:8,accept:3,access:[0,3],accommod:0,accord:[1,3,4,8],accordingli:5,accuraci:[1,5],accuracy_polici:11,achiev:[1,3,4,5],acm:[0,3,10,11],across:[0,1,4,5,6],action:[0,11],acut:0,ad:6,add:[3,4,8],add_word:10,addit:3,addition:[0,11],adjust:[3,6,11],adjusted_quantif:8,adjustedclassifyandcount:11,adopt:[3,4],advanc:[0,6],advantag:3,ae:[1,2,5,8],ae_:1,affect:8,afterward:[9,11],again:5,against:5,aggreg:[1,4,5,6,7,8],aggregativeprobabilisticquantifi:[3,11],aggregativequantifi:[3,11],aggregg:11,aim:[4,5],al:[0,2],alaiz:[3,11],alegr:[3,11],alejandro:4,alia:[3,11],all:[0,1,2,3,5,8,9,11],allia:3,allow:[0,1,2,3,5,8,10,11],almost:3,along:[0,3,11],alreadi:[3,11],also:[0,1,2,3,5,6,9],altern:4,although:[3,4,5,9,11],alwai:[3,4,5],among:3,an:[0,1,2,3,4,5,6,8,9,11],analys:[5,6],analysi:[0,3,6,9,10,11],analyz:5,ani:[0,1,3,4,5,6,8,10,11],anoth:[0,1,3,5],anyon:0,api:6,app:8,appeal:1,appear:5,append:5,appli:[2,3,4,5,8,10],appropri:4,approxim:[1,5,11],ar:[0,1,3,4,5,8,9,10,11],archive_filenam:8,archive_path:8,arg:[8,10,11],args_i:8,argu:4,argument:[0,1,3,5],arifici:8,aris:1,around:1,arrai:[1,3,5,8,10],articl:[3,4,11],artifici:[0,1,3,4,5,6,8],artificial_prevalence_predict:8,artificial_prevalence_protocol:8,artificial_prevalence_report:8,artificial_prevalence_sampl:8,artificial_sampling_ev:[1,4],artificial_sampling_gener:[0,10],artificial_sampling_index_gener:10,artificial_sampling_predict:[1,5],artificial_sampling_report:1,arxiv:4,asarrai:1,asdataload:9,asonam:0,assess:4,assign:[3,8],associ:10,assum:[1,6,11],assumpt:[1,5,6],astyp:10,attempt:3,attribut:11,august:0,autom:[0,3,6],automat:[0,1],av:[3,11],avail:[0,1,2,3,5,6],averag:[1,3],avoid:1,axi:5,b:[0,10],balanc:[0,4],band:5,bar:8,barranquero:[2,3,11],base:[0,3,6,7,8,9],base_classifi:5,base_estim:3,base_quantifier_class:11,baseestim:[9,11],baselin:6,basequantifi:[3,8,11],basic:[5,11],batch_siz:9,batch_size_test:9,been:[0,3,4,5,10,11],befor:[3,11],behav:[3,5],being:[4,8],belief:1,belong:3,below:[0,2,3,5,10],best:[4,8,11],best_model:8,best_model_:4,best_params_:4,better:4,between:[4,5,6],beyond:5,bia:6,bias:5,bidirect:11,bin:[5,11],bin_bia:5,bin_diag:5,binar:[8,10],binari:[3,5,6,10,11],binary_bias_bin:[5,8],binary_bias_glob:[5,8],binary_diagon:[5,8],binary_quantifi:11,binaryquantifi:11,block:0,bool:[8,9,11],both:5,bound:8,box:5,breast:0,brief:1,broken:5,budg:1,budget:[1,4],build:11,bypass:11,c:[3,4,9,10,11],calibr:3,calibratedclassifi:3,calibratedclassifiercv:3,calibratedcv:11,call:[0,1,5,8,9,11],callabl:[0,8,10],can:[0,1,2,3,4,5,8],cancer:0,cannot:11,cardiotocographi:0,care:[9,11],carri:3,casa_token:[],castano:[3,11],castro:[3,11],categor:3,categori:1,cc:[3,5,11],ceil:8,center:5,chang:[0,1,3,11],character:[3,6],characteriz:[0,3,11],charg:[0,8],check:[3,4],checkpoint:[9,11],checkpointdir:11,checkpointnam:11,checkpointpath:9,choic:4,chosen:[4,8],cl:0,class2int:10,class_weight:4,classes_:[8,10,11],classif:[0,1,3,7,8,10,11],classif_posterior:[3,11],classif_predict:[3,11],classif_predictions_bin:11,classifi:[1,4,5,6,11],classifier_net:9,classifiermixin:9,classifyandcount:[3,11],classmethod:[0,10,11],classnam:10,clean_checkpoint:11,clean_checkpoint_dir:11,clear:5,clearer:1,clearli:5,clip:8,close:1,closer:1,cmc:0,cnn:3,cnnnet:[3,9],code:[0,3,4,5],coincid:[0,6],col:[0,10],collect:[0,8,10],collet:10,color:[5,8],colormap:8,column:[0,10],com:[],combin:[0,1,4,8,9],combinatio:8,combinations_budget:8,come:0,commandlin:[],common:11,commonli:6,compar:[5,11],comparison:5,compil:[2,3],complet:[3,5],compon:9,compress:0,comput:[1,3,5,8,9,11],computation:4,compute_fpr:11,compute_t:11,compute_tpr:11,concept:6,concur:11,conduct:0,confer:[0,3,10],configur:[4,8],consid:[3,5,10],consist:[0,4,5,10],constrain:[1,5],constructor:3,consult:[0,1],contain:[1,2,3,5,8,9,10,11],contanin:8,content:7,context:8,contrast:1,control:[1,4],conv_block:9,conv_lay:9,convert:[1,3],copi:10,cornel:[],correct:11,correspond:[5,10],cost:1,costli:4,could:[0,1,3,4,5,6,11],count:[4,5,6,10,11],count_:[],counter:10,countvector:10,covari:10,cover:[1,4],coz:[0,3,11],cpu:[1,9],creat:[0,6,8],create_if_not_exist:8,create_parent_dir:8,crisp:3,criteria:4,cross:[3,11],cs:[],csr_matrix:10,csv:10,ctg:0,cuda:[3,11],cumbersom:1,curios:5,current:[3,8,10],custom:[3,6,8],customarili:[3,4],cv:[3,4],cyan:5,dat:[0,9],data:[1,3,4,5,6,7,8,9,11],data_hom:10,datafram:1,dataset:[1,3,4,5,6,7,8,9,11],dataset_nam:10,deal:0,decaesteck:[3,11],decim:1,decis:3,decision_funct:9,dedic:1,deep:[3,8,9,11],def:[0,1,3,5,8],defin:[0,3,8,9,11],degre:4,del:[0,3,11],delai:8,deliv:3,dens:0,depend:[0,1,4,5,8],describ:[3,11],descript:0,design:4,desir:[0,1],despit:1,detail:[0,1,3,6,11],determin:[1,4,5],detriment:5,devel:10,develop:[4,6],deviat:[0,1,5],devic:[0,3,5,9,11],df:[1,10],df_replac:10,diabet:0,diagon:6,dict:[8,9,10,11],dictionari:8,differ:[0,1,3,4,5,6,8,10],difficult:5,digit:0,dimens:[8,9,10],dimension:[8,10],directli:[0,1,3],directori:[2,10],discoveri:[3,11],discuss:5,displai:[1,5],distanc:11,distant:[1,8],distribut:[0,3,5,8,11],diverg:[1,3],dl:[],doabl:0,doc_embed:11,doc_embedding_s:11,doc_posterior:11,document:[0,1,3,5,10,11],document_embed:9,doe:[0,2,3,8],doi:[],done:3,dot:5,down:5,download:[0,2,3],download_fil:8,download_file_if_not_exist:8,drawn:[0,1,4],drift:6,drop:11,drop_p:9,ds:[3,11],ds_polici:11,ds_policy_get_posterior:11,dtype:1,dump:10,dure:[1,5],dynam:[3,11],e:[0,1,3,4,5,6,8,10,11],eacc:11,each:[0,1,3,4,5,8,9,10,11],early_stop:11,earlystop:8,easili:[0,2,5],ecc:11,edu:[],eemq:11,effect:3,effici:3,ehdi:11,either:[1,3,8,11],element:3,elm:[3,11],em:11,embed:[3,9],embedding_s:9,empti:10,emq:[5,11],encod:10,end:[4,8],endeavour:6,enough:5,ensembl:[0,6,11],ensemblefactori:11,ensure_probabilist:11,entir:[0,3,4,5],environ:[1,3,4,5,8],ep:[1,8],epacc:11,epoch:[9,11],epsilon:[1,11],equal:[1,8],equidist:[0,8],equip:[3,5],err:8,err_drift:5,err_nam:8,error:[3,4,6,7],error_:[],error_by_drift:[5,8],error_funct:1,error_metr:[1,4,8],error_nam:[5,8,11],establish:8,estim:[1,3,5,6,8,9,11],estim_prev:[1,5,8],estim_preval:[3,6],esuli:[0,2,3,10,11],et:[0,2],etc:6,eval_budget:[4,8],evalu:[0,3,4,5,6,7],everi:[3,9,11],everyth:3,evinc:5,ex:[],exact:0,exactli:0,exampl:[0,1,3,4,5,8,9,11],exce:8,excel:0,except:[3,8],exemplifi:0,exhibit:[4,5],exist:8,expand_frame_repr:1,expect:6,expectationmaximizationquantifi:[3,11],experi:[1,2,3,4,5,8],explain:[1,5],explicitlossminim:11,explicitlossminimis:11,explor:[4,8],express:10,ext:2,extend:[2,3,11],extens:[0,2,5],extern:3,extract:[1,8],f1:[1,9],f1_error:8,f1e:[1,8],f:[0,1,3,4,5,6,10,11],fabrizio:4,facilit:6,fact:[3,5],fals:[1,3,5,8,9,10,11],famili:3,familiar:3,fast:8,faster:[0,10],feat1:10,feat2:10,featn:10,featur:0,feature_extract:10,fetch:[0,6],fetch_review:[0,1,3,4,5,10],fetch_twitt:[0,3,6,10],fetch_ucidataset:[0,3,10],fetch_ucilabelledcollect:[0,10],ff_layer:11,fhe:0,file:[0,5,10],fin:0,find:[0,4],finish:4,first:[0,1,2,3,5,8,10,11],fit:[1,3,4,5,6,8,9,10,11],fit_learn:[3,11],fit_transform:10,fix:[1,4],float64:1,fold:[3,11],folder:0,follow:[0,1,3,4,5,6],fomart:10,for_model_select:[0,10],form:[0,9],format:[0,5,10],former:[2,9,11],forward:[9,11],found:[0,3,4],four:3,fp:11,fpr:8,framework:6,frequenc:0,from:[0,1,3,4,5,6,8,10,11],from_csv:10,from_nam:[1,8],from_spars:10,from_text:10,full:1,fulli:0,func:8,further:[0,1,3],fusion:[0,3,11],futur:3,g:[0,1,3,4,6,8,10,11],gao:[0,3,10,11],gasp:[0,10],gen:8,gen_data:5,gen_fn:8,gen_prevalence_predict:8,gener:[0,1,3,4,5,8,9,10,11],generation_func:8,german:0,get:[0,1,5,8,9],get_aggregative_estim:11,get_nprevpoints_approxim:[1,8],get_param:[3,8,9,11],get_probability_distribut:11,get_quapy_hom:8,github:[],given:[1,3,4,11],goe:4,good:[4,5],got:4,govern:1,grant:11,grid:[4,8,11],gridsearchcv:4,gridsearchq:[4,8],group:3,guarante:11,guez:[3,11],gzip:0,ha:[3,4,5],haberman:[0,3],handl:0,happen:[4,5],hard:3,harder:5,harri:0,have:[0,1,2,3,4,5,9,10,11],hcr:[0,3,10],hdy:[6,11],held:[3,4],helling:11,hellingerdist:8,hellingerdistancei:[3,11],help:5,here:1,hidden:5,hidden_s:9,hide:5,high:5,higher:[1,5],hlt:[],hold:6,home:10,hook:[9,11],how:[0,1,3,4,5,11],howev:[0,4,5,11],hp:[0,3,4,10],html:[],http:[],hyper:[4,8],hyperparam:4,hyperparamet:[3,8,11],i:[0,1,3,4,5,8,10,11],id:[0,3,10],idf:0,ieee:0,ignor:[8,9,10,11],iid:[1,5,6],illustr:[3,4,5],imdb:[0,5,10],implement:[0,1,3,4,5,6,11],impos:4,improv:3,includ:[0,1,3,5,6],inde:[3,4],index:[0,3,6,10],indextransform:10,indic:[0,1,3,4,5,8,10,11],individu:[1,3],infer:0,inform:[0,1,3,4,8,10,11],infrequ:10,inherit:3,init:3,init_hidden:[9,11],initi:0,inplac:[1,3,10],input:[3,5,8,9],insight:5,inspir:3,instal:[0,3,6],instanc:[0,3,4,5,6,8,9,10,11],instanti:[0,1,3,4],instead:[1,3,4,9,11],integ:[3,10],integr:6,interest:[1,5,6],interestingli:5,interfac:[0,1],intern:[0,3,10],interpret:[5,6],interv:[1,5,8],introduc:1,invok:[0,1,3,8,10],involv:[2,5],ionospher:0,iri:0,irrespect:5,isaggreg:11,isbinari:[8,10,11],isometr:5,isprobabilist:11,isti:[],item:8,iter:[0,8,11],its:[3,4],itself:[3,11],j:[0,3,11],joachim:3,job:[2,8],joblib:2,just:[1,3],k:[3,6,11],kei:8,kept:10,kernel_height:9,kfcv:[0,10,11],kindl:[0,1,3,5,10],kld:[1,2,8,9],know:3,knowledg:[0,3,10,11],known:[0,3,4],kullback:[1,3],kwarg:[9,10,11],l1:11,label:[0,3,4,5,6,8,9,10,11],labelledcollect:[0,3,4,8,10,11],larg:4,largest:8,last:[1,3,5],lastli:3,latex:5,latinn:[3,11],latter:[9,11],layer:3,lead:1,learn:[1,2,3,4,6,8,11],learner:[3,4,11],least:[0,10],leav:10,legend:8,leibler:[1,3],less:[8,10],let:[1,3],level:11,leverag:3,like:[0,1,3,5],limit:[5,8],line:[1,3],linear:5,linear_model:[1,3,4,6],linearsvc:[3,5],linspac:5,list:[0,5,8,10],listedcolormap:8,literatur:[0,1,4,6],load:[0,3,8,10],loader:0,loader_func:[0,10],local:8,log:10,logist:[1,3,9,11],logisticregress:[1,3,4,6],logscal:8,logspac:4,longer:8,look:[0,1,3,5],loss:[6,9,11],low:5,lower:[5,8],lower_is_bett:8,lowest:5,lr:[1,3,9,11],lstm:3,lstm_class_nlay:9,lstm_hidden_s:11,lstm_nlayer:11,lstmnet:9,m:[3,8,11],machin:[1,4,6],made:[0,2,11],mae:[1,4,6,8,9,11],mae_loss:11,main:5,maintain:[3,11],make:[0,1,3],mammograph:0,manag:[0,3,10],mani:[1,3,4,5,6,11],manner:0,manual:0,map:[1,9],map_parallel:8,matplotlib:[2,8],matric:[0,5,10],matrix:5,max:11,max_it:11,max_sample_s:11,maxim:6,maximum:[1,8],maximumlikelihoodprevalenceestim:11,md:[],mean:[0,1,3,4,5,6,10,11],mean_absolute_error:8,mean_relative_absolute_error:8,measur:[2,3,4,5,6,11],mediansweep2:11,mediansweep:11,member:3,mention:3,merg:5,meta:[6,7,8],method:[0,1,4,5,6,7,8],method_data:5,method_nam:[5,8],metric:[1,3,4,6,8],might:1,min_df:[1,3,4,5,10],min_po:11,mine:[0,3,11],minim:8,minimum:10,minimun:10,mining6:10,mixtur:3,mkld:[1,8,11],mnkld:[1,8,11],modal:4,model:[0,1,5,6,8,11],model_select:[4,7],modifi:[3,8],modul:[0,1,3,5,6,7],moment:[0,3],more:[3,5,8],moreo:[0,3,4,10],most:[0,3,5,6,11],movi:0,mrae:[1,6,8,9,11],ms2:11,ms:11,mse:[1,3,6,8,11],msg:11,multiprocess:8,multivari:[3,11],must:3,my:[],my_arrai:8,my_custom_load:0,my_data:0,mycustomloss:3,n:[0,1,8],n_bin:[5,8],n_class:[1,3,8,9,10,11],n_compon:9,n_epoch:11,n_job:[1,3,4,8,10,11],n_preval:[0,8,10],n_prevpoint:[1,4,5,8],n_repeat:[1,8],n_repetit:[1,4,5,8],name:[5,8,9,10],nativ:6,natur:[1,8],natural_prevalence_predict:8,natural_prevalence_protocol:8,natural_prevalence_report:8,natural_sampling_gener:10,natural_sampling_index_gener:10,nbin:[5,8],ndarrai:[1,3,8,10,11],necessarili:11,need:[0,3,9,11],neg:[0,5],nest:9,net:9,network:[0,10,11],neural:[0,7,8,10],neuralclassifiertrain:[3,9],neutral:0,next:[4,8],nfold:[0,10],nkld:[1,2,6,8,9],nn:[9,11],nogap:10,non:[3,11],non_aggreg:[7,8],none:[1,4,8,9,10,11],nonetheless:4,nor:3,normal:[0,1,3,11],normalize_preval:8,note:[1,3,4,5],now:5,nowadai:3,np:[1,3,4,5,8],npp:8,nprevpoint:8,nrepeat:[0,10],num_prevalence_combin:[1,8],number:[0,1,3,5,8,10,11],numer:[0,1,3,6,10],numpi:[2,4,8,11],o_l6x_pcf09mdetq4tu7jk98mxfbgsxp9zso14jkuiyudgfg0:[],object:[0,8,9,10,11],observ:1,obtain:[1,4],occur:[5,10],occurr:10,octob:[0,3],offer:[3,6],older:2,omd:[0,10],ommit:1,onc:[1,3,5,8],one:[0,1,3,4,5,8,9,11],ones:[1,3,5,8,10],onevsal:[3,11],onli:[0,3,5,8,11],open:[0,6],oper:3,opt:4,optim:[2,3,4,8,11],optimize_threshold:11,option:[0,1,3,5,8,10,11],order:[0,2,3,5,8,10,11],order_bi:11,org:[],orient:[3,6,8,11],origin:[0,3,10,11],os:0,other:[1,3,5,6,8],otherwis:[0,3,11],our:[],out:[3,4,5],outcom:5,outer:8,output:[0,1,3,4,11],over:[3,4],overal:1,overestim:5,overrid:3,overridden:[3,9,11],own:4,p:[0,3,8,11],p_hat:8,pacc:[1,3,5,11],packag:[0,2,3,6,7],pad:9,pad_length:9,padding_length:9,page:[0,2,6],pageblock:0,pair:0,panda:[1,2],paper:[0,3,11],parallel:[1,3,8],param:[4,8,9,10,11],param_grid:[4,8,11],param_mod_sel:11,param_model_sel:11,paramet:[1,3,4,8,9,10,11],part:[3,10],particular:[0,1,3],particularli:1,pass:[0,1,5,9,11],past:1,patch:[2,3],path:[0,3,5,8,10],patienc:[8,9,11],pattern:[3,11],pca:9,pcalr:9,pcc:[3,4,5,11],pd:1,pdf:5,peopl:[],perf:6,perform:[1,3,4,5,6,8,9,11],phonem:0,pick:4,pickl:[3,8,10],pickle_path:8,pickled_resourc:8,pii:[],pip:2,pipelin:9,pkl:8,plai:0,plan:3,pleas:3,plot:[6,7],png:5,point:[0,1,3,8],polici:[3,11],popular:6,portion:4,pos_class:[8,10],posit:[0,3,5],possibl:[1,3,8,9],posterior:[3,8,11],posterior_prob:[3,11],postpon:3,potter:0,pp:[0,3],practic:[0,4],pre:[0,3],prec:[0,8],precis:[0,1],preclassifi:3,predict:[3,4,5,8,9,11],predict_proba:[3,9,11],predictor:1,prepare_svmperf:[2,3],preprint:4,preprocess:[0,1,3,7,8],present:[0,3,10],preserv:[1,5],pretti:5,prev:[0,1,8,10],prevail:3,preval:[0,1,3,4,5,6,8,10,11],prevalence_estim:8,prevalence_from_label:8,prevalence_from_prob:8,prevalence_linspac:8,prevel:11,previou:3,previous:11,prevs_estim:11,prevs_hat:[1,8],princip:9,print:[0,1,3,4,6],prior:[1,3,4,5,6],priori:[3,11],probabilist:[3,11],probabilisticadjustedclassifyandcount:11,probabilisticclassifyandcount:11,probabl:[1,3,4,5,6,11],problem:[0,3,5,11],procedur:[3,6,11],proceed:[0,3,10],process:[3,4,8],processor:3,procol:1,produc:[0,1,5,8],product:3,progress:8,properli:0,properti:[3,8,9,10,11],proport:[3,4,8,11],propos:[2,3,11],protocl:8,protocol:[0,3,4,5,6,8],provid:[0,3,5,6],ptecondestim:11,ptr:[3,11],ptr_polici:11,purpos:[0,11],python:[0,6],pytorch:2,q:[0,2,3,8,9],qacc:9,qdrop_p:11,qf1:9,qgm:9,qp:[0,1,3,4,5,6,8],quanet:[2,6,9,11],quanetmodul:11,quanettrain:11,quantif:[0,1,6,8,10,11],quantifi:[3,4,5,6,8,11],quantification_error:8,quantiti:8,quapi:[0,1,2,3,4,5],quapy_data:0,quay_data:10,quevedo:[0,3,11],quick:[],r:[0,3,11],rae:[1,2,8],rais:[3,8],rand:8,random:[1,3,4,5,8],random_se:[1,8],random_st:10,randomli:0,rang:[0,5],rank:3,rare:10,rate:3,rather:[1,4],raw:10,rb:0,re:[3,4,10],read:10,reader:[7,8],readm:[],real:10,reason:[3,5,6],receiv:[0,3,5],recip:[9,11],recognit:[3,11],recommend:[1,5],recurr:[0,3,10],red:0,red_siz:[3,11],reduc:[0,10],reduce_column:[0,10],refer:10,refit:[4,8],regard:4,regist:[9,11],regress:9,regressor:[1,3,11],reindex_label:10,rel:[1,3],relative_absolute_error:8,reli:[1,3],reliabl:[3,11],rememb:5,remov:10,repeat:[8,10],repetit:8,repl:10,replac:[0,3,10],replic:[1,4,8],report:1,repositori:0,repr_siz:9,repres:[1,3,5,10,11],represent:[0,3],request:[0,8,11],requir:[0,1,3,6,9],reset_net_param:9,resourc:8,respect:[0,1,5,11],respond:3,rest:[10,11],result:[1,2,3,4,5,6,11],retain:[0,3],retrain:4,return_constrained_dim:8,reus:[0,3,8],review:[5,6,10],reviews_sentiment_dataset:0,rewrit:5,right:4,role:0,root:6,roughli:0,routin:8,row:10,run:[0,1,2,3,4,5,8,9,11],s003132031400291x:[],s:[0,1,3,4,5,8,9,10],saeren:[3,11],sai:11,said:3,same:[0,3,5,10],sampl:[0,1,3,4,5,6,8,10,11],sample_s:[0,1,3,4,5,8,10,11],sampling_from_index:[0,10],sampling_index:[0,10],sander:[0,10],save:[5,8],save_or_show:8,save_text_fil:8,savepath:[5,8],scall:10,scenario:[1,3,4,5,6],scienc:[3,11],sciencedirect:[],scikit:[2,3,4],scipi:[2,10],score:[0,1,4,10],script:[1,2,3,6],se:[1,8],search:[3,4,6,8,11],sebastiani:[0,3,4,10,11],second:[0,1,3,5,8],section:4,see:[0,1,2,3,4,5,6],seed:[1,4,8],seem:3,seemingli:5,seen:5,select:[0,3,6,8,11],selector:3,self:[3,9,10,11],semeion:0,semev:0,semeval13:[0,10],semeval14:[0,10],semeval15:[0,10],semeval16:[0,6,10],sentenc:10,sentiment:[3,6,10,11],separ:[8,10],seri:0,serv:3,set:[0,1,3,4,5,6,8,9,10,11],set_opt:1,set_param:[3,8,9,11],set_siz:9,sever:0,sh:[2,3],shape:[5,8],share:[0,10],shift:[1,4,6,8],shoud:3,should:[0,1,3,4,5,6,9,10,11],show:[0,1,3,4,5,8,10],show_std:[5,8],showcas:5,shown:[1,5],shuffl:[9,10],signific:1,silent:[8,9,11],similar:11,simpl:[0,3,5,9,11],simplest:3,simplex:[0,8],simpli:[1,2,3,4,5,6,9,11],sinc:[0,1,3,5,8,9,11],singl:[1,3,6,11],size:[0,1,3,8,10,11],sklearn:[1,3,4,5,6,9,10,11],sld:3,slice:8,smooth:[1,8],smooth_limits_epsilon:8,so:[0,1,3,5,8,9,11],social:[0,3,10,11],soft:3,softwar:0,solid:5,solv:4,solve_adjust:11,some:[0,1,3,5],some_arrai:8,sometim:1,sonar:0,sourc:[2,3,6],sout:11,space:[0,4],spambas:0,spars:[0,10],special:[0,5,10],specif:[3,4],specifi:[0,1,3,5,8,10,11],spectf:0,spectrum:[0,1,4,5],speed:3,split:[0,3,4,5,10,11],split_stratifi:10,splitstratifi:10,spmatrix:10,squar:[1,3],sst:[0,10],stabil:1,standard:[0,1,5,10],start:4,stat:10,state:8,statist:[0,1,11],stats_siz:11,stdout:8,step:[5,8],store:[0,10],str:[0,8,10],strategi:[3,4],stratifi:[0,3],stride:9,string:[1,8,10],strongli:[4,5],strprev:[0,1,8],structur:3,studi:[0,3,11],subclass:[9,11],subinterv:5,sublinear_tf:10,submit:0,submodul:7,subobject:9,suboptim:4,subpackag:7,subsequ:[10,11],subtract:[0,8],subtyp:10,suffic:5,suffici:11,sum:11,summar:0,supervis:[4,6],support:[3,6],surpass:1,svm:[3,5,6],svm_light:[],svm_perf:[],svm_perf_quantif:[2,3],svmae:[3,11],svmkld:[3,11],svmnkld:[3,11],svmperf:[2,3,7,8],svmperf_bas:[9,11],svmperf_hom:3,svmq:[3,11],svmrae:[3,11],syntax:5,system:4,t50:11,t:[0,1,3],take:[0,3,5,8,9,11],taken:3,target:[3,5,6,8,11],task:[3,4,11],temp_se:8,tempor:8,tend:5,tendenc:5,term:[0,1,3,4,5,6,10,11],test:[0,1,3,4,5,6,8,10,11],test_bas:[],test_dataset:[],test_method:[],test_path:[0,10],test_sampl:8,test_split:10,text2tfidf:[0,1,3,10],text:[0,3,8,10,11],textclassifiernet:9,textual:[0,6,10],tf:[0,10],tfidf:[0,4,5,10],tfidfvector:10,than:[1,4,5,8,10],thei:[0,3],them:[0,3,9,11],theoret:4,thereaft:1,thi:[0,1,2,3,4,5,6,8,9,11],thing:3,third:[1,5],those:[1,3,4,5,8,9],though:3,three:[0,5],thresholdoptim:11,through:[3,8],thu:[3,4,5,11],tictacto:0,time:[0,1,3,8,10],timeout:8,timeouterror:8,timer:8,titl:8,tj:[],tn:11,token:[0,10],tool:[1,6],top:[3,11],torch:[3,9,11],torchdataset:9,toward:5,tp:11,tpr:8,tqdm:2,tr_iter_per_poch:11,tr_prev:[5,8,11],tradition:1,train:[0,1,3,4,5,6,8,10,11],train_path:[0,10],train_prev:[5,8],train_prop:10,train_siz:10,train_val_split:11,training_help:11,training_preval:5,training_s:5,transact:[3,11],transform:[0,9,10],transfus:0,trivial:3,true_prev:[1,5,8],true_preval:6,turn:4,tweet:[0,3,10,11],twitter:[6,10],twitter_sentiment_datasets_test:0,twitter_sentiment_datasets_train:0,two:[0,1,3,4,5,8],type:[0,3,9],typic:[1,4,5],uci:6,unabl:0,unadjust:5,unbias:5,uncompress:0,under:1,underestim:5,unfortun:5,unifi:0,uniform_prevalence_sampl:8,uniform_sampl:10,uniform_sampling_index:10,uniform_simplex_sampl:8,uniformli:8,union:[8,11],uniqu:10,unit:0,unix:0,unk:10,unless:11,unlik:[1,4],unus:[8,11],up:[3,4,8,11],updat:9,url:8,us:[0,1,3,4,5,6,8,10,11],user:[0,1,5],utf:10,util:[7,9],v:[3,11],va_iter_per_poch:11,val:[0,10],val_split:[3,4,8,9,11],valid:[0,1,3,4,5,8,10,11],valid_loss:[3,9],valid_polici:11,valu:[0,1,3,8,9,10,11],variabl:[1,3,5,8],varianc:[0,5],variant:[5,6,11],varieti:4,variou:[1,5],vector:[0,10],verbos:[0,1,4,8,9,10,11],veri:[3,5],versatil:6,version:2,vertical_xtick:8,via:[0,2,3,11],view:5,visual:[5,6],vocab_s:9,vocabulari:10,vocabulary_s:[3,9,10],vs:3,w:[0,3,10,11],wa:[0,3,5,10,11],wai:[1,11],want:[3,4],warn:10,wb:[0,10],wdbc:0,we:[0,1,3,4,5,6],weight:10,weight_decai:9,well:[0,3,4,5,9],were:0,what:3,when:[0,1,3,4,5,8],whenev:[5,8],where:[3,5,8,10,11],wherebi:4,whether:[8,10,11],which:[0,1,3,4,5,8,10,11],white:0,whole:[0,1,3,4,8],why:3,wide:5,wiki:[0,3],wine:0,within:[8,9,11],without:[1,3,8],word:[1,3,6,10],work:[1,3,4,5,9],worker:1,wors:[4,5],would:[0,1,3,5,6,8,11],wrapper:8,written:6,www:[],x:[5,8,9,10,11],xavier_uniform:9,xlrd:[0,2],xy:10,y:[5,9,10,11],y_:11,y_pred:8,y_true:8,ye:10,yeast:0,yield:[5,8],you:[2,3],your:3,z:0,zero:0,zfthyovrzwxmgfzylqw_y8cagg:[],zip:[0,5]},titles:["Datasets","Evaluation","Installation","Quantification Methods","Model Selection","Plotting","Welcome to QuaPy\u2019s documentation!","quapy","quapy package","quapy.classification package","quapy.data package","quapy.method package"],titleterms:{"function":8,A:6,The:3,ad:0,aggreg:[3,11],base:[10,11],bia:5,classif:[4,9],classifi:3,content:[6,8,9,10,11],count:3,custom:0,data:[0,10],dataset:[0,10],diagon:5,distanc:3,document:6,drift:5,emq:3,ensembl:3,error:[1,5,8],evalu:[1,8],ex:[],exampl:6,expect:3,explicit:3,featur:6,get:[],hdy:3,helling:3,indic:6,instal:2,introduct:6,issu:0,learn:0,loss:[2,3,4],machin:0,maxim:3,measur:1,meta:[3,11],method:[3,9,11],minim:3,model:[3,4],model_select:8,modul:[8,9,10,11],network:3,neural:[3,9,11],non_aggreg:11,orient:[2,4],packag:[8,9,10,11],perf:2,plot:[5,8],preprocess:10,process:0,protocol:1,quanet:3,quantif:[2,3,4,5],quapi:[6,7,8,9,10,11],quick:6,reader:10,readm:[],requir:2,review:0,s:6,select:4,sentiment:0,start:[],submodul:[8,9,10,11],subpackag:8,svm:2,svmperf:9,tabl:6,target:4,test:[],test_bas:[],test_dataset:[],test_method:[],titl:[],twitter:0,uci:0,util:8,variant:3,welcom:6,y:3}})
\ No newline at end of file
+Search.setIndex({docnames:["Datasets","Evaluation","Installation","Methods","Model-Selection","Plotting","index","modules","quapy","quapy.classification","quapy.data","quapy.method"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["Datasets.md","Evaluation.md","Installation.rst","Methods.md","Model-Selection.md","Plotting.md","index.rst","modules.rst","quapy.rst","quapy.classification.rst","quapy.data.rst","quapy.method.rst"],objects:{"":{quapy:[8,0,0,"-"]},"quapy.classification":{methods:[9,0,0,"-"],neural:[9,0,0,"-"],svmperf:[9,0,0,"-"]},"quapy.classification.methods":{LowRankLogisticRegression:[9,1,1,""]},"quapy.classification.methods.LowRankLogisticRegression":{fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural":{CNNnet:[9,1,1,""],LSTMnet:[9,1,1,""],NeuralClassifierTrainer:[9,1,1,""],TextClassifierNet:[9,1,1,""],TorchDataset:[9,1,1,""]},"quapy.classification.neural.CNNnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.LSTMnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.NeuralClassifierTrainer":{device:[9,3,1,""],fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],reset_net_params:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural.TextClassifierNet":{dimensions:[9,2,1,""],document_embedding:[9,2,1,""],forward:[9,2,1,""],get_params:[9,2,1,""],predict_proba:[9,2,1,""],vocabulary_size:[9,3,1,""],xavier_uniform:[9,2,1,""]},"quapy.classification.neural.TorchDataset":{asDataloader:[9,2,1,""]},"quapy.classification.svmperf":{SVMperf:[9,1,1,""]},"quapy.classification.svmperf.SVMperf":{decision_function:[9,2,1,""],fit:[9,2,1,""],predict:[9,2,1,""],set_params:[9,2,1,""],valid_losses:[9,4,1,""]},"quapy.data":{base:[10,0,0,"-"],datasets:[10,0,0,"-"],preprocessing:[10,0,0,"-"],reader:[10,0,0,"-"]},"quapy.data.base":{Dataset:[10,1,1,""],LabelledCollection:[10,1,1,""],isbinary:[10,5,1,""]},"quapy.data.base.Dataset":{SplitStratified:[10,2,1,""],binary:[10,3,1,""],classes_:[10,3,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],stats:[10,2,1,""],vocabulary_size:[10,3,1,""]},"quapy.data.base.LabelledCollection":{Xy:[10,3,1,""],artificial_sampling_generator:[10,2,1,""],artificial_sampling_index_generator:[10,2,1,""],binary:[10,3,1,""],counts:[10,2,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],natural_sampling_generator:[10,2,1,""],natural_sampling_index_generator:[10,2,1,""],prevalence:[10,2,1,""],sampling:[10,2,1,""],sampling_from_index:[10,2,1,""],sampling_index:[10,2,1,""],split_stratified:[10,2,1,""],stats:[10,2,1,""],uniform_sampling:[10,2,1,""],uniform_sampling_index:[10,2,1,""]},"quapy.data.datasets":{df_replace:[10,5,1,""],fetch_UCIDataset:[10,5,1,""],fetch_UCILabelledCollection:[10,5,1,""],fetch_reviews:[10,5,1,""],fetch_twitter:[10,5,1,""],warn:[10,5,1,""]},"quapy.data.preprocessing":{IndexTransformer:[10,1,1,""],index:[10,5,1,""],reduce_columns:[10,5,1,""],standardize:[10,5,1,""],text2tfidf:[10,5,1,""]},"quapy.data.preprocessing.IndexTransformer":{add_word:[10,2,1,""],fit:[10,2,1,""],fit_transform:[10,2,1,""],index:[10,2,1,""],transform:[10,2,1,""],vocabulary_size:[10,2,1,""]},"quapy.data.reader":{binarize:[10,5,1,""],from_csv:[10,5,1,""],from_sparse:[10,5,1,""],from_text:[10,5,1,""],reindex_labels:[10,5,1,""]},"quapy.error":{absolute_error:[8,5,1,""],acc_error:[8,5,1,""],acce:[8,5,1,""],ae:[8,5,1,""],f1_error:[8,5,1,""],f1e:[8,5,1,""],from_name:[8,5,1,""],kld:[8,5,1,""],mae:[8,5,1,""],mean_absolute_error:[8,5,1,""],mean_relative_absolute_error:[8,5,1,""],mkld:[8,5,1,""],mnkld:[8,5,1,""],mrae:[8,5,1,""],mse:[8,5,1,""],nkld:[8,5,1,""],rae:[8,5,1,""],relative_absolute_error:[8,5,1,""],se:[8,5,1,""],smooth:[8,5,1,""]},"quapy.evaluation":{artificial_prevalence_prediction:[8,5,1,""],artificial_prevalence_protocol:[8,5,1,""],artificial_prevalence_report:[8,5,1,""],evaluate:[8,5,1,""],gen_prevalence_prediction:[8,5,1,""],natural_prevalence_prediction:[8,5,1,""],natural_prevalence_protocol:[8,5,1,""],natural_prevalence_report:[8,5,1,""]},"quapy.functional":{HellingerDistance:[8,5,1,""],adjusted_quantification:[8,5,1,""],artificial_prevalence_sampling:[8,5,1,""],get_nprevpoints_approximation:[8,5,1,""],normalize_prevalence:[8,5,1,""],num_prevalence_combinations:[8,5,1,""],prevalence_from_labels:[8,5,1,""],prevalence_from_probabilities:[8,5,1,""],prevalence_linspace:[8,5,1,""],strprev:[8,5,1,""],uniform_prevalence_sampling:[8,5,1,""],uniform_simplex_sampling:[8,5,1,""]},"quapy.method":{aggregative:[11,0,0,"-"],base:[11,0,0,"-"],meta:[11,0,0,"-"],neural:[11,0,0,"-"],non_aggregative:[11,0,0,"-"]},"quapy.method.aggregative":{ACC:[11,1,1,""],AdjustedClassifyAndCount:[11,4,1,""],AggregativeProbabilisticQuantifier:[11,1,1,""],AggregativeQuantifier:[11,1,1,""],CC:[11,1,1,""],ClassifyAndCount:[11,4,1,""],ELM:[11,1,1,""],EMQ:[11,1,1,""],ExpectationMaximizationQuantifier:[11,4,1,""],ExplicitLossMinimisation:[11,4,1,""],HDy:[11,1,1,""],HellingerDistanceY:[11,4,1,""],MAX:[11,1,1,""],MS2:[11,1,1,""],MS:[11,1,1,""],MedianSweep2:[11,4,1,""],MedianSweep:[11,4,1,""],OneVsAll:[11,1,1,""],PACC:[11,1,1,""],PCC:[11,1,1,""],ProbabilisticAdjustedClassifyAndCount:[11,4,1,""],ProbabilisticClassifyAndCount:[11,4,1,""],SVMAE:[11,1,1,""],SVMKLD:[11,1,1,""],SVMNKLD:[11,1,1,""],SVMQ:[11,1,1,""],SVMRAE:[11,1,1,""],T50:[11,1,1,""],ThresholdOptimization:[11,1,1,""],X:[11,1,1,""],training_helper:[11,5,1,""]},"quapy.method.aggregative.ACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""],solve_adjustment:[11,2,1,""]},"quapy.method.aggregative.AggregativeProbabilisticQuantifier":{posterior_probabilities:[11,2,1,""],predict_proba:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.AggregativeQuantifier":{aggregate:[11,2,1,""],aggregative:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],learner:[11,3,1,""],n_classes:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.CC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ELM":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.EMQ":{EM:[11,2,1,""],EPSILON:[11,4,1,""],MAX_ITER:[11,4,1,""],aggregate:[11,2,1,""],fit:[11,2,1,""],predict_proba:[11,2,1,""]},"quapy.method.aggregative.HDy":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.MS":{optimize_threshold:[11,2,1,""]},"quapy.method.aggregative.MS2":{optimize_threshold:[11,2,1,""]},"quapy.method.aggregative.OneVsAll":{aggregate:[11,2,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],posterior_probabilities:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.PACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.PCC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ThresholdOptimization":{aggregate:[11,2,1,""],compute_fpr:[11,2,1,""],compute_table:[11,2,1,""],compute_tpr:[11,2,1,""],fit:[11,2,1,""],optimize_threshold:[11,2,1,""]},"quapy.method.base":{BaseQuantifier:[11,1,1,""],BinaryQuantifier:[11,1,1,""],isaggregative:[11,5,1,""],isbinary:[11,5,1,""],isprobabilistic:[11,5,1,""]},"quapy.method.base.BaseQuantifier":{aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.base.BinaryQuantifier":{binary:[11,3,1,""]},"quapy.method.meta":{EACC:[11,5,1,""],ECC:[11,5,1,""],EEMQ:[11,5,1,""],EHDy:[11,5,1,""],EPACC:[11,5,1,""],Ensemble:[11,1,1,""],ensembleFactory:[11,5,1,""],get_probability_distribution:[11,5,1,""]},"quapy.method.meta.Ensemble":{VALID_POLICIES:[11,4,1,""],accuracy_policy:[11,2,1,""],aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],ds_policy:[11,2,1,""],ds_policy_get_posteriors:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],ptr_policy:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""],sout:[11,2,1,""]},"quapy.method.neural":{QuaNetModule:[11,1,1,""],QuaNetTrainer:[11,1,1,""],mae_loss:[11,5,1,""]},"quapy.method.neural.QuaNetModule":{device:[11,3,1,""],forward:[11,2,1,""],init_hidden:[11,2,1,""]},"quapy.method.neural.QuaNetTrainer":{classes_:[11,3,1,""],clean_checkpoint:[11,2,1,""],clean_checkpoint_dir:[11,2,1,""],epoch:[11,2,1,""],fit:[11,2,1,""],get_aggregative_estims:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.non_aggregative":{MaximumLikelihoodPrevalenceEstimation:[11,1,1,""]},"quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation":{classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.model_selection":{GridSearchQ:[8,1,1,""]},"quapy.model_selection.GridSearchQ":{best_model:[8,2,1,""],classes_:[8,3,1,""],fit:[8,2,1,""],get_params:[8,2,1,""],quantify:[8,2,1,""],set_params:[8,2,1,""]},"quapy.plot":{binary_bias_bins:[8,5,1,""],binary_bias_global:[8,5,1,""],binary_diagonal:[8,5,1,""],error_by_drift:[8,5,1,""],save_or_show:[8,5,1,""]},"quapy.util":{EarlyStop:[8,1,1,""],create_if_not_exist:[8,5,1,""],create_parent_dir:[8,5,1,""],download_file:[8,5,1,""],download_file_if_not_exists:[8,5,1,""],get_quapy_home:[8,5,1,""],map_parallel:[8,5,1,""],parallel:[8,5,1,""],pickled_resource:[8,5,1,""],save_text_file:[8,5,1,""],temp_seed:[8,5,1,""]},quapy:{classification:[9,0,0,"-"],data:[10,0,0,"-"],error:[8,0,0,"-"],evaluation:[8,0,0,"-"],functional:[8,0,0,"-"],isbinary:[8,5,1,""],method:[11,0,0,"-"],model_selection:[8,0,0,"-"],plot:[8,0,0,"-"],util:[8,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","property","Python property"],"4":["py","attribute","Python attribute"],"5":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:property","4":"py:attribute","5":"py:function"},terms:{"0":[0,1,3,4,5,8,9,10,11],"00":[0,1,4,8],"000":1,"0001":[4,11],"000e":1,"001":[4,9,11],"009":1,"01":[8,9,11],"017":1,"018":0,"02":1,"021":0,"02552":4,"03":1,"034":1,"035":1,"037":1,"04":1,"041":1,"042":1,"046":1,"048":1,"05":[5,8],"055":1,"063":0,"065":0,"070":1,"073":1,"075":1,"078":0,"081":0,"082":[0,1],"083":0,"086":0,"091":1,"099":0,"1":[0,1,3,4,5,8,9,10,11],"10":[0,1,4,5,8,9,11],"100":[0,1,3,4,5,9,10,11],"1000":[0,4,11],"10000":4,"100000":4,"101":[4,10],"1010":4,"1024":11,"104":0,"108":1,"109":0,"11":[0,1,6],"11338":0,"114":1,"1145":[],"12":9,"120":0,"1215742":0,"1271":0,"13":[0,9],"139":0,"14":[3,11],"142":1,"146":[3,11],"1473":0,"148":0,"1484":0,"15":[3,8,11],"150":0,"153":0,"157":0,"158":0,"159":0,"1593":0,"1594":0,"1599":0,"161":0,"163":[0,1],"164":[0,3,11],"167":0,"17":0,"1771":1,"1775":[0,3],"1778":[0,3],"178":0,"1823":0,"1839":0,"18399":0,"1853":0,"19":[3,10,11],"193":0,"199151":0,"19982":4,"1e":9,"1st":0,"2":[0,1,3,5,8,10,11],"20":[5,8,11],"200":[1,9],"2000":0,"2002":[3,11],"2011":4,"2013":[3,11],"2015":[0,2,3,9,11],"2016":[3,10,11],"2017":[0,3,11],"2018":[0,3,10],"2019":[3,11],"2020":4,"20342":4,"206":0,"207":0,"208":0,"21":[1,3,5,8,11],"210":8,"211":0,"2126":0,"2155":0,"21591":0,"218":[3,11],"2184":0,"219e":1,"22":[0,3,9,10,11],"222":0,"222046":0,"226":0,"229":1,"229399":0,"23":9,"235":1,"238":0,"2390":0,"24":[0,9],"243":0,"248563":0,"24866":4,"24987":4,"25":[0,5,8,9,11],"25000":0,"256":[0,9],"26":9,"261":0,"265":0,"266":0,"267":0,"27":[1,3,9,11],"270":0,"2700406":[],"271":0,"272":0,"274":0,"275":1,"27th":[0,3,10],"28":3,"280":0,"281":0,"282":0,"283":[0,1],"288":0,"289":0,"2971":0,"2nd":0,"2t":1,"2x5fcv":0,"3":[0,1,3,5,6,8,9,10,11],"30":[0,1,3,11],"300":[0,1,9],"305":0,"306":0,"312":0,"32":[0,6],"33":[0,5],"331":0,"333":0,"335":0,"337":0,"34":[0,3,11],"341":0,"346":1,"347":0,"350":0,"351":0,"357":1,"359":0,"361":0,"366":1,"372":0,"373":0,"376132":0,"3765":0,"3813":0,"3821":0,"383e":1,"387e":1,"392":0,"394":0,"399":0,"3f":[1,6],"3rd":0,"4":[0,1,3,4,5,8,11],"40":[0,3,4,11],"404333":0,"407":0,"41":[3,11],"412":0,"412e":1,"413":0,"414":0,"417":0,"41734":4,"42":[1,8],"421":0,"4259":0,"426e":1,"427":0,"430":0,"434":0,"435":1,"43676":4,"437":0,"44":0,"446":0,"45":[3,5,11],"452":0,"459":1,"4601":0,"461":0,"463":0,"465":0,"466":0,"470":0,"48":[3,11],"481":0,"48135":4,"486":0,"4898":0,"492":0,"496":0,"4960":1,"497":0,"5":[0,1,3,4,5,8,9,10,11],"50":[0,5,8,11],"500":[0,1,4,5,11],"5000":[1,5],"5005":4,"507":0,"508":0,"512":[9,11],"514":0,"515e":1,"530":0,"534":0,"535":0,"535e":1,"5379":4,"539":0,"541":1,"546":0,"5473":0,"54it":4,"55":5,"55it":4,"565":1,"569":0,"57":0,"573":0,"578":1,"583":0,"591":[3,11],"5f":4,"5fcv":11,"6":[0,1,3,5,8,10,11],"60":0,"600":1,"601":0,"604":[3,11],"606":0,"625":0,"627":0,"633e":1,"634":1,"64":[9,11],"640":0,"641":0,"650":0,"653":0,"654":1,"66":[1,11],"665":0,"667":0,"669":0,"67":5,"683":0,"688":0,"691":0,"694582":0,"7":[1,5,9],"70":0,"700":0,"701e":1,"711":0,"717":1,"725":1,"730":0,"735":0,"740e":1,"748":0,"75":[0,5,8],"762":0,"774":0,"778":0,"787":0,"794":0,"798":0,"8":[0,1,5,10,11],"8000":0,"830":0,"837":1,"858":1,"861":0,"87":[0,3,11],"8788":0,"889504":0,"8d2fhsgcvn0aaaaa":[],"9":[0,1,3,5,11],"90":[5,8],"901":0,"909":1,"914":1,"917":0,"919":0,"922":0,"923":0,"935":0,"936":0,"937":0,"945":1,"95":8,"9533":0,"958":0,"97":0,"979":0,"982":0,"99":8,"abstract":[3,9,11],"case":[0,1,3,4,5,8,11],"class":[0,1,3,4,5,6,8,9,10,11],"d\u00edez":[3,11],"default":[1,3,8,9,10],"do":[0,1,3,4,8,9],"final":[1,3,5],"float":[0,3,8,9,10,11],"function":[0,1,3,4,5,6,7,9,11],"g\u00e1llego":[0,3,11],"gonz\u00e1lez":[3,11],"import":[0,1,3,4,5,6],"int":[0,5,8,10,11],"long":[4,9],"new":[0,3,10,11],"p\u00e9rez":[0,3,11],"return":[0,1,3,4,5,8,9,10,11],"rodr\u0131":[3,11],"short":9,"static":[3,11],"true":[0,1,3,4,5,6,8,9,10,11],"try":4,"while":[3,5,8,9,11],A:[0,3,8,9,10,11],As:[3,4],By:[1,3,8],For:[0,1,5,6,8,11],If:[3,5,8,11],In:[0,1,2,3,4,5,6,9,11],It:[3,4,5],One:[0,1,3,11],That:[1,4],The:[0,1,2,4,5,6,8,9,10,11],Then:3,These:0,To:[5,10],_:5,__:[],__class__:5,__name__:5,_adjust:[],_ae_:[],_classify_:11,_error_name_:11,_fit_learner_:11,_kld_:[],_labelledcollection_:11,_learner_:11,_mean:[],_min_df_:10,_my:[],_nkld_:[],_posterior_probabilities_:11,_q_:[],_rae_:[],_svmperf_:[],ab:[],aboud:3,about:[0,5],abov:[0,3,5],absolut:[1,3,5,6],absolute_error:8,abstractmethod:3,acc:[1,3,5,6,8,11],acc_error:8,accept:3,access:[0,3],accommod:0,accord:[1,3,4,8,9],accordingli:5,accuraci:[1,5],accuracy_polici:11,achiev:[1,3,4,5],acm:[0,3,10,11],across:[0,1,4,5,6],action:[0,11],acut:0,ad:6,add:[3,4,8],add_word:10,addit:3,addition:[0,11],adjust:[3,6,11],adjusted_quantif:8,adjustedclassifyandcount:11,adopt:[3,4],advanc:[0,6],advantag:3,ae:[1,2,5,8],ae_:1,affect:8,afterward:11,again:5,against:5,aggreg:[1,4,5,6,7,8],aggregativeprobabilisticquantifi:[3,11],aggregativequantifi:[3,11],aggregg:11,aim:[4,5],al:[0,2,9],alaiz:[3,11],alegr:[3,11],alejandro:4,alia:[3,11],all:[0,1,2,3,5,8,11],allia:3,alloc:9,allow:[0,1,2,3,5,8,9,10,11],almost:3,along:[0,3,11],alreadi:[3,11],also:[0,1,2,3,5,6,9],altern:4,although:[3,4,5,11],alwai:[3,4,5],among:3,an:[0,1,2,3,4,5,6,8,9,11],analys:[5,6],analysi:[0,3,6,10,11],analyz:5,ani:[0,1,3,4,5,6,8,9,10,11],anoth:[0,1,3,5],anyon:0,api:6,app:8,appeal:1,appear:5,append:5,appli:[2,3,4,5,8,9,10],appropri:4,approxim:[1,5,9,11],ar:[0,1,3,4,5,8,9,10,11],archive_filenam:8,archive_path:8,arg:[8,10,11],args_i:8,argu:4,argument:[0,1,3,5],arifici:8,aris:1,around:1,arrai:[1,3,5,8,9,10],articl:[3,4,11],artifici:[0,1,3,4,5,6,8],artificial_prevalence_predict:8,artificial_prevalence_protocol:8,artificial_prevalence_report:8,artificial_prevalence_sampl:8,artificial_sampling_ev:[1,4],artificial_sampling_gener:[0,10],artificial_sampling_index_gener:10,artificial_sampling_predict:[1,5],artificial_sampling_report:1,arxiv:4,asarrai:1,asdataload:9,asonam:0,assess:4,assign:[3,8],associ:10,assum:[1,6,11],assumpt:[1,5,6],astyp:10,attempt:3,attribut:11,august:0,autom:[0,3,6],automat:[0,1],av:[3,11],avail:[0,1,2,3,5,6,9],averag:[1,3],avoid:1,axi:5,b:[0,10],balanc:[0,4],band:5,bar:8,barranquero:[2,3,9,11],base:[0,3,6,7,8,9],base_classifi:5,base_estim:3,base_quantifier_class:11,baseestim:[9,11],baselin:6,basequantifi:[3,8,11],basic:[5,11],batch:9,batch_siz:9,batch_size_test:9,been:[0,3,4,5,10,11],befor:[3,9,11],behav:[3,5],being:[4,8],belief:1,belong:3,below:[0,2,3,5,10],best:[4,8,9,11],best_model:8,best_model_:4,best_params_:4,better:4,between:[4,5,6,9],beyond:5,bia:6,bias:5,bidirect:11,bin:[5,11],bin_bia:5,bin_diag:5,binar:[8,10],binari:[3,5,6,9,10,11],binary_bias_bin:[5,8],binary_bias_glob:[5,8],binary_diagon:[5,8],binary_quantifi:11,binaryquantifi:11,block:0,bool:[8,11],both:5,bound:8,box:5,breast:0,brief:1,broken:5,budg:1,budget:[1,4],build:11,bypass:11,c:[3,4,9,10,11],calibr:3,calibratedclassifi:3,calibratedclassifiercv:3,calibratedcv:11,call:[0,1,5,8,11],callabl:[0,8,10],can:[0,1,2,3,4,5,8],cancer:0,cannot:11,cardiotocographi:0,care:11,carri:3,casa_token:[],castano:[3,11],castro:[3,11],categor:3,categori:1,cc:[3,5,11],ceil:8,center:5,chang:[0,1,3,11],character:[3,6],characteriz:[0,3,11],charg:[0,8],check:[3,4],checkpoint:[9,11],checkpointdir:11,checkpointnam:11,checkpointpath:9,choic:4,chosen:[4,8],cl:0,class2int:10,class_weight:4,classes_:[8,10,11],classif:[0,1,3,7,8,10,11],classif_posterior:[3,11],classif_predict:[3,11],classif_predictions_bin:11,classifi:[1,4,5,6,9,11],classifier_net:9,classifiermixin:9,classifyandcount:[3,11],classmethod:[0,10,11],classnam:10,clean_checkpoint:11,clean_checkpoint_dir:11,clear:5,clearer:1,clearli:5,clip:8,close:1,closer:1,cmc:0,cnn:3,cnnnet:[3,9],code:[0,3,4,5,9],coincid:[0,6],col:[0,10],collect:[0,8,9,10],collet:10,color:[5,8],colormap:8,column:[0,10],com:[],combin:[0,1,4,8],combinatio:8,combinations_budget:8,come:0,commandlin:[],common:11,commonli:6,compar:[5,11],comparison:5,compil:[2,3],complet:[3,5],compon:9,compress:0,comput:[1,3,5,8,11],computation:4,compute_fpr:11,compute_t:11,compute_tpr:11,concept:6,concur:11,conduct:0,confer:[0,3,10],configur:[4,8],consecut:9,consid:[3,5,9,10],consist:[0,4,5,9,10],constrain:[1,5],constructor:3,consult:[0,1],contain:[1,2,3,5,8,9,10,11],contanin:8,content:7,context:8,contrast:1,control:[1,4],conv_block:[],conv_lay:[],convert:[1,3,9],convolut:9,copi:10,cornel:[],correct:11,correspond:[5,10],cost:1,costli:4,could:[0,1,3,4,5,6,11],count:[4,5,6,10,11],count_:[],counter:10,countvector:10,covari:10,cover:[1,4,9],coz:[0,3,11],cpu:[1,9],creat:[0,6,8],create_if_not_exist:8,create_parent_dir:8,crisp:3,criteria:4,cross:[3,11],cs:[],csr_matrix:10,csv:10,ctg:0,cuda:[3,9,11],cumbersom:1,curios:5,current:[3,8,9,10],custom:[3,6,8],customarili:[3,4],cv:[3,4],cyan:5,dat:[0,9],data:[1,3,4,5,6,7,8,9,11],data_hom:10,datafram:1,dataload:9,dataset:[1,3,4,5,6,7,8,9,11],dataset_nam:10,deal:0,decaesteck:[3,11],decai:9,decim:1,decis:[3,9],decision_funct:9,decomposit:9,dedic:1,deep:[3,8,11],def:[0,1,3,5,8],defin:[0,3,8,9,11],degre:4,del:[0,3,11],delai:8,deliv:3,dens:0,depend:[0,1,4,5,8],describ:[3,11],descript:0,design:4,desir:[0,1],despit:1,detail:[0,1,3,6,9,11],determin:[1,4,5],detriment:5,devel:10,develop:[4,6],deviat:[0,1,5],devic:[0,3,5,9,11],df:[1,10],df_replac:10,diabet:0,diagon:6,dict:[8,10,11],dictionari:[8,9],differ:[0,1,3,4,5,6,8,10],difficult:5,digit:0,dimens:[8,9,10],dimension:[8,9,10],directli:[0,1,3],directori:[2,9,10],discoveri:[3,11],discuss:5,disjoint:9,displai:[1,5],distanc:11,distant:[1,8],distribut:[0,3,5,8,11],diverg:[1,3],dl:[],doabl:0,doc_embed:11,doc_embedding_s:11,doc_posterior:11,document:[0,1,3,5,9,10,11],document_embed:9,doe:[0,2,3,8],doi:[],done:3,dot:5,down:5,download:[0,2,3],download_fil:8,download_file_if_not_exist:8,drawn:[0,1,4],drift:6,drop:[9,11],drop_p:9,dropout:9,ds:[3,11],ds_polici:11,ds_policy_get_posterior:11,dtype:1,dump:10,dure:[1,5],dynam:[3,9,11],e:[0,1,3,4,5,6,8,9,10,11],eacc:11,each:[0,1,3,4,5,8,9,10,11],earli:9,early_stop:11,earlystop:8,easili:[0,2,5,9],ecc:11,edu:[],eemq:11,effect:3,effici:3,ehdi:11,either:[1,3,8,11],element:3,elm:[3,11],em:11,emb:9,embed:[3,9],embed_s:9,embedding_s:9,empti:10,emq:[5,11],enabl:9,encod:10,end:[4,8],endeavour:6,enough:5,ensembl:[0,6,11],ensemblefactori:11,ensure_probabilist:11,entir:[0,3,4,5],environ:[1,3,4,5,8],ep:[1,8],epacc:11,epoch:[9,11],epsilon:[1,11],equal:[1,8],equidist:[0,8],equip:[3,5],err:8,err_drift:5,err_nam:8,error:[3,4,6,7,9],error_:[],error_by_drift:[5,8],error_funct:1,error_metr:[1,4,8],error_nam:[5,8,11],establish:8,estim:[1,3,5,6,8,9,11],estim_prev:[1,5,8],estim_preval:[3,6],esuli:[0,2,3,9,10,11],et:[0,2,9],etc:6,eval_budget:[4,8],evalu:[0,3,4,5,6,7,9],eventu:9,everi:[3,11],everyth:3,evinc:5,ex:[],exact:0,exactli:0,exampl:[0,1,3,4,5,8,9,11],exce:8,excel:0,except:[3,8],exemplifi:0,exhibit:[4,5],exist:8,expand_frame_repr:1,expect:6,expectationmaximizationquantifi:[3,11],experi:[1,2,3,4,5,8],explain:[1,5],explicitlossminim:11,explicitlossminimis:11,explor:[4,8],express:10,ext:2,extend:[2,3,11],extens:[0,2,5],extern:3,extract:[1,8],f1:[1,9],f1_error:8,f1e:[1,8],f:[0,1,3,4,5,6,10,11],fabrizio:4,facilit:6,fact:[3,5],fals:[1,3,5,8,9,10,11],famili:3,familiar:3,far:9,fast:8,faster:[0,10],feat1:10,feat2:10,featn:10,featur:0,feature_extract:10,fetch:[0,6],fetch_review:[0,1,3,4,5,10],fetch_twitt:[0,3,6,10],fetch_ucidataset:[0,3,10],fetch_ucilabelledcollect:[0,10],ff_layer:11,fhe:0,file:[0,5,9,10],fin:0,find:[0,4],finish:4,first:[0,1,2,3,5,8,10,11],fit:[1,3,4,5,6,8,9,10,11],fit_learn:[3,11],fit_transform:10,fix:[1,4],float64:1,fold:[3,11],folder:0,follow:[0,1,3,4,5,6],fomart:10,for_model_select:[0,10],form:0,format:[0,5,10],former:[2,11],forward:[9,11],found:[0,3,4,9],four:3,fp:11,fpr:8,framework:6,frequenc:0,from:[0,1,3,4,5,6,8,10,11],from_csv:10,from_nam:[1,8],from_spars:10,from_text:10,full:1,fulli:0,func:8,further:[0,1,3,9],fusion:[0,3,11],futur:3,g:[0,1,3,4,6,8,10,11],gao:[0,3,10,11],gasp:[0,10],gen:8,gen_data:5,gen_fn:8,gen_prevalence_predict:8,gener:[0,1,3,4,5,8,9,10,11],generation_func:8,german:0,get:[0,1,5,8,9],get_aggregative_estim:11,get_nprevpoints_approxim:[1,8],get_param:[3,8,9,11],get_probability_distribut:11,get_quapy_hom:8,github:[],given:[1,3,4,9,11],goe:4,good:[4,5],got:4,govern:1,gpu:9,grant:11,grid:[4,8,11],gridsearchcv:4,gridsearchq:[4,8],group:3,guarante:11,guez:[3,11],gzip:0,ha:[3,4,5,9],haberman:[0,3],handl:0,happen:[4,5],hard:3,harder:5,harri:0,have:[0,1,2,3,4,5,10,11],hcr:[0,3,10],hdy:[6,11],held:[3,4,9],helling:11,hellingerdist:8,hellingerdistancei:[3,11],help:5,here:1,hidden:[5,9],hidden_s:9,hide:5,high:5,higher:[1,5],hlt:[],hold:6,home:10,hook:11,how:[0,1,3,4,5,11],howev:[0,4,5,11],hp:[0,3,4,10],html:[],http:[],hyper:[4,8,9],hyperparam:4,hyperparamet:[3,8,11],i:[0,1,3,4,5,8,9,10,11],id:[0,3,10],idf:0,ieee:0,ignor:[8,10,11],iid:[1,5,6],illustr:[3,4,5],imdb:[0,5,10],implement:[0,1,3,4,5,6,9,11],impos:4,improv:[3,9],includ:[0,1,3,5,6],inde:[3,4],index:[0,3,6,9,10],indextransform:10,indic:[0,1,3,4,5,8,10,11],individu:[1,3],infer:0,inform:[0,1,3,4,8,10,11],infrequ:10,inherit:3,init:3,init_hidden:11,initi:[0,9],inplac:[1,3,10],input:[3,5,8,9],insight:5,inspir:3,instal:[0,3,6,9],instanc:[0,3,4,5,6,8,9,10,11],instanti:[0,1,3,4,9],instead:[1,3,4,11],integ:[3,9,10],integr:6,interest:[1,5,6],interestingli:5,interfac:[0,1],intern:[0,3,10],interpret:[5,6],interv:[1,5,8],introduc:1,invok:[0,1,3,8,10],involv:[2,5],io:[],ionospher:0,iri:0,irrespect:5,isaggreg:11,isbinari:[8,10,11],isometr:5,isprobabilist:11,isti:[],item:8,iter:[0,8,11],its:[3,4,9],itself:[3,11],j:[0,3,11],joachim:[3,9],job:[2,8],joblib:2,just:[1,3],k:[3,6,11],kei:8,kept:10,kernel:9,kernel_height:9,kfcv:[0,10,11],kindl:[0,1,3,5,10],kld:[1,2,8,9],know:3,knowledg:[0,3,10,11],known:[0,3,4],kullback:[1,3],kwarg:[9,10,11],l1:11,label:[0,3,4,5,6,8,9,10,11],labelledcollect:[0,3,4,8,10,11],larg:4,largest:8,last:[1,3,5,9],lastli:3,latex:5,latinn:[3,11],latter:11,layer:[3,9],lead:1,learn:[1,2,3,4,6,8,9,11],learner:[3,4,9,11],least:[0,10],leav:10,legend:8,leibler:[1,3],length:9,less:[8,10],let:[1,3],level:11,leverag:3,like:[0,1,3,5,9],limit:[5,8],line:[1,3],linear:5,linear_model:[1,3,4,6,9],linearsvc:[3,5],linspac:5,list:[0,5,8,9,10],listedcolormap:8,literatur:[0,1,4,6],load:[0,3,8,10],loader:0,loader_func:[0,10],local:8,log:10,logist:[1,3,9,11],logisticregress:[1,3,4,6,9],logscal:8,logspac:4,longer:8,longest:9,look:[0,1,3,5],loss:[6,9,11],low:[5,9],lower:[5,8],lower_is_bett:8,lowest:5,lowranklogisticregress:9,lr:[1,3,9,11],lstm:[3,9],lstm_class_nlay:9,lstm_hidden_s:11,lstm_nlayer:11,lstmnet:9,m:[3,8,11],machin:[1,4,6],made:[0,2,11],mae:[1,4,6,8,9,11],mae_loss:11,main:5,maintain:[3,11],make:[0,1,3],mammograph:0,manag:[0,3,10],mani:[1,3,4,5,6,11],manner:0,manual:0,map:[1,9],map_parallel:8,margin:9,matplotlib:[2,8],matric:[0,5,10],matrix:5,max:11,max_it:11,max_sample_s:11,maxim:6,maximum:[1,8,9],maximumlikelihoodprevalenceestim:11,md:[],mean:[0,1,3,4,5,6,9,10,11],mean_absolute_error:8,mean_relative_absolute_error:8,measur:[2,3,4,5,6,11],mediansweep2:11,mediansweep:11,member:3,memori:9,mention:3,merg:5,meta:[6,7,8],method:[0,1,4,5,6,7,8],method_data:5,method_nam:[5,8],metric:[1,3,4,6,8],might:1,min_df:[1,3,4,5,10],min_po:11,mine:[0,3,11],minim:8,minimum:10,minimun:10,mining6:10,mixtur:3,mkld:[1,8,11],mnkld:[1,8,11],mock:9,modal:4,model:[0,1,5,6,8,9,11],model_select:[4,7],modifi:[3,8],modul:[0,1,3,5,6,7],moment:[0,3],more:[3,5,8],moreo:[0,3,4,10],most:[0,3,5,6,11],movi:0,mrae:[1,6,8,9,11],ms2:11,ms:11,mse:[1,3,6,8,11],msg:11,multiprocess:8,multivari:[3,9,11],must:3,my:[],my_arrai:8,my_custom_load:0,my_data:0,mycustomloss:3,n:[0,1,8,9],n_bin:[5,8],n_class:[1,3,8,9,10,11],n_compon:9,n_dimens:9,n_epoch:11,n_featur:9,n_instanc:9,n_job:[1,3,4,8,10,11],n_preval:[0,8,10],n_prevpoint:[1,4,5,8],n_repeat:[1,8],n_repetit:[1,4,5,8],n_sampl:9,name:[5,8,9,10],nativ:6,natur:[1,8],natural_prevalence_predict:8,natural_prevalence_protocol:8,natural_prevalence_report:8,natural_sampling_gener:10,natural_sampling_index_gener:10,nbin:[5,8],ndarrai:[1,3,8,10,11],necessarili:11,need:[0,3,11],neg:[0,5],nest:[],net:9,network:[0,9,10,11],neural:[0,7,8,10],neuralclassifiertrain:[3,9],neutral:0,next:[4,8,9],nfold:[0,10],nkld:[1,2,6,8,9],nn:[9,11],nogap:10,non:[3,11],non_aggreg:[7,8],none:[1,4,8,9,10,11],nonetheless:4,nor:3,normal:[0,1,3,11],normalize_preval:8,note:[1,3,4,5],now:5,nowadai:3,np:[1,3,4,5,8],npp:8,nprevpoint:8,nrepeat:[0,10],num_prevalence_combin:[1,8],number:[0,1,3,5,8,9,10,11],numer:[0,1,3,6,10],numpi:[2,4,8,9,11],o_l6x_pcf09mdetq4tu7jk98mxfbgsxp9zso14jkuiyudgfg0:[],object:[0,8,9,10,11],observ:1,obtain:[1,4],occur:[5,10],occurr:10,octob:[0,3],off:9,offer:[3,6],older:2,omd:[0,10],ommit:1,onc:[1,3,5,8],one:[0,1,3,4,5,8,11],ones:[1,3,5,8,10],onevsal:[3,11],onli:[0,3,5,8,9,11],open:[0,6],oper:3,opt:4,optim:[2,3,4,8,9,11],optimize_threshold:11,option:[0,1,3,5,8,10,11],order:[0,2,3,5,8,10,11],order_bi:11,org:[],orient:[3,6,8,11],origin:[0,3,10,11],os:0,other:[1,3,5,6,8],otherwis:[0,3,11],our:[],out:[3,4,5,9],outcom:5,outer:8,output:[0,1,3,4,9,11],over:[3,4],overal:1,overestim:5,overrid:3,overridden:[3,11],own:4,p:[0,3,8,11],p_hat:8,pacc:[1,3,5,11],packag:[0,2,3,6,7],pad:9,pad_length:9,padding_length:9,page:[0,2,6],pageblock:0,pair:0,panda:[1,2],paper:[0,3,11],parallel:[1,3,8],param:[4,8,9,10,11],param_grid:[4,8,11],param_mod_sel:11,param_model_sel:11,paramet:[1,3,4,8,9,10,11],part:[3,10],particular:[0,1,3],particularli:1,pass:[0,1,5,9,11],past:1,patch:[2,3,9],path:[0,3,5,8,9,10],patienc:[8,9,11],pattern:[3,11],pca:9,pcalr:[],pcc:[3,4,5,11],pd:1,pdf:5,peopl:[],perf:[6,9],perform:[1,3,4,5,6,8,9,11],phonem:0,pick:4,pickl:[3,8,10],pickle_path:8,pickled_resourc:8,pii:[],pip:2,pipelin:[],pkl:8,plai:0,plan:3,pleas:3,plot:[6,7],png:5,point:[0,1,3,8],polici:[3,11],popular:6,portion:4,pos_class:[8,10],posit:[0,3,5],possibl:[1,3,8],posterior:[3,8,9,11],posterior_prob:[3,11],postpon:3,potter:0,pp:[0,3],practic:[0,4],pre:[0,3],prec:[0,8],precis:[0,1],preclassifi:3,predict:[3,4,5,8,9,11],predict_proba:[3,9,11],predictor:1,prepare_svmperf:[2,3],preprint:4,preprocess:[0,1,3,7,8],present:[0,3,10],preserv:[1,5],pretti:5,prev:[0,1,8,10],prevail:3,preval:[0,1,3,4,5,6,8,10,11],prevalence_estim:8,prevalence_from_label:8,prevalence_from_prob:8,prevalence_linspac:8,prevel:11,previou:3,previous:11,prevs_estim:11,prevs_hat:[1,8],princip:9,print:[0,1,3,4,6,9],prior:[1,3,4,5,6],priori:[3,11],probabilist:[3,11],probabilisticadjustedclassifyandcount:11,probabilisticclassifyandcount:11,probabl:[1,3,4,5,6,9,11],problem:[0,3,5,11],procedur:[3,6,11],proceed:[0,3,10],process:[3,4,8],processor:3,procol:1,produc:[0,1,5,8],product:3,progress:8,properli:0,properti:[3,8,9,10,11],proport:[3,4,8,9,11],propos:[2,3,11],protocl:8,protocol:[0,3,4,5,6,8],provid:[0,3,5,6],ptecondestim:11,ptr:[3,11],ptr_polici:11,purpos:[0,11],python:[0,6],pytorch:2,q:[0,2,3,8,9],qacc:9,qdrop_p:11,qf1:9,qgm:9,qp:[0,1,3,4,5,6,8],quanet:[2,6,9,11],quanetmodul:11,quanettrain:11,quantif:[0,1,6,8,9,10,11],quantifi:[3,4,5,6,8,11],quantification_error:8,quantiti:8,quapi:[0,1,2,3,4,5],quapy_data:0,quay_data:10,quevedo:[0,3,11],quick:[],r:[0,3,11],rae:[1,2,8],rais:[3,8],rand:8,random:[1,3,4,5,8],random_se:[1,8],random_st:10,randomli:0,rang:[0,5],rank:[3,9],rare:10,rate:[3,9],rather:[1,4],raw:10,rb:0,re:[3,4,10],read:10,reader:[7,8],readm:[],real:[9,10],reason:[3,5,6],receiv:[0,3,5],recip:11,recognit:[3,11],recommend:[1,5],recurr:[0,3,10],red:0,red_siz:[3,11],reduc:[0,10],reduce_column:[0,10],refer:[9,10],refit:[4,8],regard:4,regist:11,regress:9,regressor:[1,3,11],reindex_label:10,reiniti:9,rel:[1,3],relative_absolute_error:8,reli:[1,3],reliabl:[3,11],rememb:5,remov:10,repeat:[8,10],repetit:8,repl:10,replac:[0,3,10],replic:[1,4,8],report:1,repositori:0,repr_siz:9,repres:[1,3,5,10,11],represent:[0,3],request:[0,8,11],requir:[0,1,3,6,9],reset_net_param:9,resourc:8,respect:[0,1,5,11],respond:3,rest:[10,11],result:[1,2,3,4,5,6,11],retain:[0,3,9],retrain:4,return_constrained_dim:8,reus:[0,3,8],review:[5,6,10],reviews_sentiment_dataset:0,rewrit:5,right:4,role:0,root:6,roughli:0,routin:8,row:10,run:[0,1,2,3,4,5,8,11],s003132031400291x:[],s:[0,1,3,4,5,8,9,10],saeren:[3,11],sai:11,said:3,same:[0,3,5,10],sampl:[0,1,3,4,5,6,8,9,10,11],sample_s:[0,1,3,4,5,8,10,11],sampling_from_index:[0,10],sampling_index:[0,10],sander:[0,10],save:[5,8],save_or_show:8,save_text_fil:8,savepath:[5,8],scall:10,scenario:[1,3,4,5,6],scienc:[3,11],sciencedirect:[],scikit:[2,3,4],scipi:[2,10],score:[0,1,4,9,10],script:[1,2,3,6],se:[1,8],search:[3,4,6,8,11],sebastiani:[0,3,4,10,11],second:[0,1,3,5,8],section:4,see:[0,1,2,3,4,5,6,9],seed:[1,4,8],seem:3,seemingli:5,seen:5,select:[0,3,6,8,11],selector:3,self:[3,9,10,11],semeion:0,semev:0,semeval13:[0,10],semeval14:[0,10],semeval15:[0,10],semeval16:[0,6,10],sentenc:10,sentiment:[3,6,10,11],separ:[8,10],seri:0,serv:3,set:[0,1,3,4,5,6,8,9,10,11],set_opt:1,set_param:[3,8,9,11],set_siz:[],sever:0,sh:[2,3],shape:[5,8,9],share:[0,10],shift:[1,4,6,8],shorter:9,shoud:3,should:[0,1,3,4,5,6,9,10,11],show:[0,1,3,4,5,8,9,10],show_std:[5,8],showcas:5,shown:[1,5],shuffl:[9,10],signific:1,silent:[8,11],similar:11,simpl:[0,3,5,11],simplest:3,simplex:[0,8],simpli:[1,2,3,4,5,6,11],sinc:[0,1,3,5,8,11],singl:[1,3,6,11],size:[0,1,3,8,9,10,11],sklearn:[1,3,4,5,6,9,10,11],sld:3,slice:8,smooth:[1,8],smooth_limits_epsilon:8,so:[0,1,3,5,8,9,11],social:[0,3,10,11],soft:3,softwar:0,solid:5,solv:4,solve_adjust:11,some:[0,1,3,5],some_arrai:8,sometim:1,sonar:0,sourc:[2,3,6,9],sout:11,space:[0,4,9],spambas:0,spars:[0,10],special:[0,5,10],specif:[3,4],specifi:[0,1,3,5,8,9,10,11],spectf:0,spectrum:[0,1,4,5],speed:3,split:[0,3,4,5,9,10,11],split_stratifi:10,splitstratifi:10,spmatrix:10,squar:[1,3],sst:[0,10],stabil:1,standard:[0,1,5,10],start:4,stat:10,state:8,statist:[0,1,11],stats_siz:11,std:9,stdout:8,step:[5,8],stop:9,store:[0,9,10],str:[0,8,10],strategi:[3,4],stratifi:[0,3],stride:9,string:[1,8,10],strongli:[4,5],strprev:[0,1,8],structur:3,studi:[0,3,11],subclass:11,subinterv:5,sublinear_tf:10,submit:0,submodul:7,subobject:[],suboptim:4,subpackag:7,subsequ:[10,11],subtract:[0,8],subtyp:10,suffic:5,suffici:11,sum:11,summar:0,supervis:[4,6],support:[3,6,9],surpass:1,svm:[3,5,6,9],svm_light:[],svm_perf:[],svm_perf_classifi:9,svm_perf_learn:9,svm_perf_quantif:[2,3],svmae:[3,11],svmkld:[3,11],svmnkld:[3,11],svmperf:[2,3,7,8],svmperf_bas:[9,11],svmperf_hom:3,svmq:[3,11],svmrae:[3,11],syntax:5,system:4,t50:11,t:[0,1,3],take:[0,3,5,8,11],taken:[3,9],target:[3,5,6,8,9,11],task:[3,4,11],temp_se:8,tempor:8,tend:5,tendenc:5,tensor:9,term:[0,1,3,4,5,6,9,10,11],test:[0,1,3,4,5,6,8,9,10,11],test_bas:[],test_dataset:[],test_method:[],test_path:[0,10],test_sampl:8,test_split:10,text2tfidf:[0,1,3,10],text:[0,3,8,9,10,11],textclassifiernet:9,textual:[0,6,10],tf:[0,10],tfidf:[0,4,5,10],tfidfvector:10,than:[1,4,5,8,9,10],thei:[0,3],them:[0,3,11],theoret:4,thereaft:1,thi:[0,1,2,3,4,5,6,8,9,11],thing:3,third:[1,5],thorsten:9,those:[1,3,4,5,8,9],though:3,three:[0,5],thresholdoptim:11,through:[3,8],thu:[3,4,5,11],tictacto:0,time:[0,1,3,8,10],timeout:8,timeouterror:8,timer:8,titl:8,tj:[],tn:11,token:[0,9,10],tool:[1,6],top:[3,11],torch:[3,9,11],torchdataset:9,toward:5,tp:11,tpr:8,tqdm:2,tr_iter_per_poch:11,tr_prev:[5,8,11],trade:9,tradition:1,train:[0,1,3,4,5,6,8,9,10,11],train_path:[0,10],train_prev:[5,8],train_prop:10,train_siz:10,train_val_split:11,trainer:9,training_help:11,training_preval:5,training_s:5,transact:[3,11],transform:[0,9,10],transfus:0,trivial:3,true_prev:[1,5,8],true_preval:6,truncatedsvd:9,turn:4,tweet:[0,3,10,11],twitter:[6,10],twitter_sentiment_datasets_test:0,twitter_sentiment_datasets_train:0,two:[0,1,3,4,5,8],type:[0,3],typic:[1,4,5,9],uci:6,unabl:0,unadjust:5,unbias:5,uncompress:0,under:1,underestim:5,unfortun:5,unifi:0,uniform_prevalence_sampl:8,uniform_sampl:10,uniform_sampling_index:10,uniform_simplex_sampl:8,uniformli:8,union:[8,11],uniqu:10,unit:0,unix:0,unk:10,unless:11,unlik:[1,4],unus:[8,9,11],up:[3,4,8,9,11],updat:[],url:8,us:[0,1,3,4,5,6,8,9,10,11],user:[0,1,5],utf:10,util:[7,9],v:[3,11],va_iter_per_poch:11,val:[0,10],val_split:[3,4,8,9,11],valid:[0,1,3,4,5,8,9,10,11],valid_loss:[3,9],valid_polici:11,valu:[0,1,3,8,9,10,11],variabl:[1,3,5,8],varianc:[0,5],variant:[5,6,11],varieti:4,variou:[1,5],vector:[0,9,10],verbos:[0,1,4,8,9,10,11],veri:[3,5],versatil:6,version:[2,9],vertical_xtick:8,via:[0,2,3,11],view:5,visual:[5,6],vocab_s:9,vocabulari:[9,10],vocabulary_s:[3,9,10],vs:3,w:[0,3,10,11],wa:[0,3,5,10,11],wai:[1,11],wait:9,want:[3,4],warn:10,wb:[0,10],wdbc:0,we:[0,1,3,4,5,6],weight:[9,10],weight_decai:9,well:[0,3,4,5],were:0,what:3,when:[0,1,3,4,5,8,9],whenev:[5,8],where:[3,5,8,9,10,11],wherebi:4,whether:[8,9,10,11],which:[0,1,3,4,5,8,9,10,11],white:0,whole:[0,1,3,4,8],why:3,wide:5,wiki:[0,3],wine:0,within:[8,11],without:[1,3,8],word:[1,3,6,9,10],work:[1,3,4,5],worker:1,wors:[4,5],would:[0,1,3,5,6,8,11],wrapper:[8,9],written:6,www:[],x:[5,8,9,10,11],xavier:9,xavier_uniform:9,xlrd:[0,2],xy:10,y:[5,9,10,11],y_:11,y_pred:8,y_true:8,ye:10,yeast:0,yield:[5,8],you:[2,3],your:3,z:0,zero:0,zfthyovrzwxmgfzylqw_y8cagg:[],zip:[0,5]},titles:["Datasets","Evaluation","Installation","Quantification Methods","Model Selection","Plotting","Welcome to QuaPy\u2019s documentation!","quapy","quapy package","quapy.classification package","quapy.data package","quapy.method package"],titleterms:{"function":8,A:6,The:3,ad:0,aggreg:[3,11],base:[10,11],bia:5,classif:[4,9],classifi:3,content:[6,8,9,10,11],count:3,custom:0,data:[0,10],dataset:[0,10],diagon:5,distanc:3,document:6,drift:5,emq:3,ensembl:3,error:[1,5,8],evalu:[1,8],ex:[],exampl:6,expect:3,explicit:3,featur:6,get:[],hdy:3,helling:3,indic:6,instal:2,introduct:6,issu:0,learn:0,loss:[2,3,4],machin:0,maxim:3,measur:1,meta:[3,11],method:[3,9,11],minim:3,model:[3,4],model_select:8,modul:[8,9,10,11],network:3,neural:[3,9,11],non_aggreg:11,orient:[2,4],packag:[8,9,10,11],perf:2,plot:[5,8],preprocess:10,process:0,protocol:1,quanet:3,quantif:[2,3,4,5],quapi:[6,7,8,9,10,11],quick:6,reader:10,readm:[],requir:2,review:0,s:6,select:4,sentiment:0,start:[],submodul:[8,9,10,11],subpackag:8,svm:2,svmperf:9,tabl:6,target:4,test:[],test_bas:[],test_dataset:[],test_method:[],titl:[],twitter:0,uci:0,util:8,variant:3,welcom:6,y:3}})
\ No newline at end of file
diff --git a/quapy/classification/methods.py b/quapy/classification/methods.py
index b313f57..01f4654 100644
--- a/quapy/classification/methods.py
+++ b/quapy/classification/methods.py
@@ -3,10 +3,18 @@ from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import LogisticRegression
-class PCALR(BaseEstimator):
+class LowRankLogisticRegression(BaseEstimator):
"""
- An example of a classification method that also generates embedded inputs, as those required for QuaNet.
- This example simply combines a Principal Component Analysis (PCA) with Logistic Regression (LR).
+ An example of a classification method (i.e., an object that implements `fit`, `predict`, and `predict_proba`)
+ that also generates embedded inputs (i.e., that implements `transform`), as those required for
+ :class:`quapy.method.neural.QuaNet`. This is a mock method to allow for easily instantiating
+ :class:`quapy.method.neural.QuaNet` on array-like real-valued instances.
+ The transformation consists of applying :class:`sklearn.decomposition.TruncatedSVD`
+ while classification is performed using :class:`sklearn.linear_model.LogisticRegression` on the low-rank space.
+
+ :param n_components: the number of principal components to retain
+ :param kwargs: parameters for the
+ `Logistic Regression `__ classifier
"""
def __init__(self, n_components=100, **kwargs):
@@ -14,35 +22,76 @@ class PCALR(BaseEstimator):
self.learner = LogisticRegression(**kwargs)
def get_params(self):
+ """
+ Get hyper-parameters for this estimator.
+
+ :return: a dictionary with parameter names mapped to their values
+ """
params = {'n_components': self.n_components}
params.update(self.learner.get_params())
return params
def set_params(self, **params):
- if 'n_components' in params:
- self.n_components = params['n_components']
- del params['n_components']
- self.learner.set_params(**params)
+ """
+ Set the parameters of this estimator.
+
+ :param parameters: a `**kwargs` dictionary with the estimator parameters for
+ `Logistic Regression `__
+ and eventually also `n_components` for `TruncatedSVD`
+ """
+ params_ = dict(params)
+ if 'n_components' in params_:
+ self.n_components = params_['n_components']
+ del params_['n_components']
+ self.learner.set_params(**params_)
def fit(self, X, y):
- self.learner.fit(X, y)
+ """
+ Fit the model according to the given training data. The fit consists of
+ fitting `TruncatedSVD` and then `LogisticRegression` on the low-rank representation.
+
+ :param X: array-like of shape `(n_samples, n_features)` with the instances
+ :param y: array-like of shape `(n_samples, n_classes)` with the class labels
+ :return: `self`
+ """
nF = X.shape[1]
self.pca = None
if nF > self.n_components:
- self.pca = TruncatedSVD(self.n_components).fit(X, y)
+ self.pca = TruncatedSVD(self.n_components).fit(X)
+ X = self.transform(X)
+ self.learner.fit(X, y)
self.classes_ = self.learner.classes_
return self
-
def predict(self, X):
- # X = self.transform(X)
+ """
+ Predicts labels for the instances `X` embedded into the low-rank space.
+
+ :param X: array-like of shape `(n_samples, n_features)` instances to classify
+ :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of
+ instances in `X`
+ """
+ X = self.transform(X)
return self.learner.predict(X)
def predict_proba(self, X):
- # X = self.transform(X)
+ """
+ Predicts posterior probabilities for the instances `X` embedded into the low-rank space.
+
+ :param X: array-like of shape `(n_samples, n_features)` instances to classify
+ :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
+ """
+ X = self.transform(X)
return self.learner.predict_proba(X)
def transform(self, X):
+ """
+ Returns the low-rank approximation of `X` with `n_components` dimensions, or `X` unaltered if
+ `n_components` >= `X.shape[1]`.
+
+ :param X: array-like of shape `(n_samples, n_features)` instances to embed
+ :return: array-like of shape `(n_samples, n_components)` with the embedded instances
+ """
if self.pca is None:
return X
return self.pca.transform(X)
diff --git a/quapy/classification/neural.py b/quapy/classification/neural.py
index 7823165..0d576c5 100644
--- a/quapy/classification/neural.py
+++ b/quapy/classification/neural.py
@@ -16,6 +16,22 @@ from quapy.util import EarlyStop
class NeuralClassifierTrainer:
+ """
+ Trains a neural network for text classification.
+
+ :param net: an instance of `TextClassifierNet` implementing the forward pass
+ :param lr: learning rate (default 1e-3)
+ :param weight_decay: weight decay (default 0)
+ :param patience: number of epochs that do not show any improvement in validation
+ to wait before applying early stop (default 10)
+ :param epochs: maximum number of training epochs (default 200)
+ :param batch_size: batch size for training (default 64)
+ :param batch_size_test: batch size for test (default 512)
+ :param padding_length: maximum number of tokens to consider in a document (default 300)
+ :param device: specify 'cpu' (default) or 'cuda' for enabling gpu
+ :param checkpointpath: where to store the parameters of the best model found so far
+ according to the evaluation in the held-out validation split (default '../checkpoint/classifier_net.dat')
+ """
def __init__(self,
net: 'TextClassifierNet',
@@ -45,23 +61,36 @@ class NeuralClassifierTrainer:
'device': torch.device(device)
}
self.learner_hyperparams = self.net.get_params()
-
self.checkpointpath = checkpointpath
self.classes_ = np.asarray([0, 1])
print(f'[NeuralNetwork running on {device}]')
-
os.makedirs(Path(checkpointpath).parent, exist_ok=True)
def reset_net_params(self, vocab_size, n_classes):
+ """Reinitialize the network parameters
+
+ :param vocab_size: the size of the vocabulary
+ :param n_classes: the number of target classes
+ """
self.net = self.net.__class__(vocab_size, n_classes, **self.learner_hyperparams)
self.net = self.net.to(self.trainer_hyperparams['device'])
self.net.xavier_uniform()
def get_params(self):
+ """Get hyper-parameters for this estimator
+
+ :return: a dictionary with parameter names mapped to their values
+ """
return {**self.net.get_params(), **self.trainer_hyperparams}
def set_params(self, **params):
+ """Set the parameters of this trainer and the learner it is training.
+ In this current version, parameter names for the trainer and learner should
+ be disjoint.
+
+ :param params: a `**kwargs` dictionary with the parameters
+ """
trainer_hyperparams = self.trainer_hyperparams
learner_hyperparams = self.net.get_params()
for key, val in params.items():
@@ -81,6 +110,10 @@ class NeuralClassifierTrainer:
@property
def device(self):
+ """ Gets the device in which the network is allocated
+
+ :return: device
+ """
return next(self.net.parameters()).device
def _train_epoch(self, data, status, pbar, epoch):
@@ -132,6 +165,14 @@ class NeuralClassifierTrainer:
f'macroF1={100 * self.status["va"]["f1"]:.2f}%')
def fit(self, instances, labels, val_split=0.3):
+ """
+ Fits the model according to the given training data.
+
+ :param instances: list of lists of indexed tokens
+ :param labels: array-like of shape `(n_samples, n_classes)` with the class labels
+ :param val_split: proportion of training documents to be taken as the validation set (default 0.3)
+ :return:
+ """
train, val = LabelledCollection(instances, labels).split_stratified(1-val_split)
opt = self.trainer_hyperparams
checkpoint = self.checkpointpath
@@ -169,9 +210,22 @@ class NeuralClassifierTrainer:
return self
def predict(self, instances):
+ """
+ Predicts labels for the instances
+
+ :param instances: list of lists of indexed tokens
+ :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of
+ instances in `X`
+ """
return np.argmax(self.predict_proba(instances), axis=-1)
def predict_proba(self, instances):
+ """
+ Predicts posterior probabilities for the instances
+
+ :param X: array-like of shape `(n_samples, n_features)` instances to classify
+ :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
+ """
self.net.eval()
opt = self.trainer_hyperparams
with torch.no_grad():
@@ -182,6 +236,13 @@ class NeuralClassifierTrainer:
return np.concatenate(positive_probs)
def transform(self, instances):
+ """
+ Returns the embeddings of the instances
+
+ :param instances: list of lists of indexed tokens
+ :return: array-like of shape `(n_samples, embed_size)` with the embedded instances,
+ where `embed_size` is defined by the classification network
+ """
self.net.eval()
embeddings = []
opt = self.trainer_hyperparams
@@ -193,6 +254,12 @@ class NeuralClassifierTrainer:
class TorchDataset(torch.utils.data.Dataset):
+ """
+ Transforms labelled instances into a Torch's :class:`torch.utils.data.DataLoader` object
+
+ :param instances: list of lists of indexed tokens
+ :param labels: array-like of shape `(n_samples, n_classes)` with the class labels
+ """
def __init__(self, instances, labels=None):
self.instances = instances
@@ -205,6 +272,18 @@ class TorchDataset(torch.utils.data.Dataset):
return {'doc': self.instances[index], 'label': self.labels[index] if self.labels is not None else None}
def asDataloader(self, batch_size, shuffle, pad_length, device):
+ """
+ Converts the labelled collection into a Torch DataLoader with dynamic padding for
+ the batch
+
+ :param batch_size: batch size
+ :param shuffle: whether or not to shuffle instances
+ :param pad_length: the maximum length for the list of tokens (dynamic padding is
+ applied, meaning that if the longest document in the batch is shorter than
+ `pad_length`, then the batch is padded up to its length, and not to `pad_length`.
+ :param device: whether to allocate tensors in cpu or in cuda
+ :return: a :class:`torch.utils.data.DataLoader` object
+ """
def collate(batch):
data = [torch.LongTensor(item['doc'][:pad_length]) for item in batch]
data = pad_sequence(data, batch_first=True, padding_value=qp.environ['PAD_INDEX']).to(device)
@@ -220,37 +299,97 @@ class TorchDataset(torch.utils.data.Dataset):
class TextClassifierNet(torch.nn.Module, metaclass=ABCMeta):
+ """
+ Abstract Text classifier (`torch.nn.Module`)
+ """
@abstractmethod
- def document_embedding(self, x): ...
+ def document_embedding(self, x):
+ """Embeds documents (i.e., performs the forward pass up to the
+ next-to-last layer).
+
+ :param x: a batch of instances, typically generated by a torch's `DataLoader`
+ instance (see :class:`quapy.classification.neural.TorchDataset`)
+ :return: a torch tensor of shape `(n_samples, n_dimensions)`, where
+ `n_samples` is the number of documents, and `n_dimensions` is the
+ dimensionality of the embedding
+ """
+ ...
def forward(self, x):
+ """Performs the forward pass.
+
+ :param x: a batch of instances, typically generated by a torch's `DataLoader`
+ instance (see :class:`quapy.classification.neural.TorchDataset`)
+ :return: a tensor of shape `(n_instances, n_classes)` with the decision scores
+ for each of the instances and classes
+ """
doc_embedded = self.document_embedding(x)
return self.output(doc_embedded)
def dimensions(self):
+ """Gets the number of dimensions of the embedding space
+
+ :return: integer
+ """
return self.dim
def predict_proba(self, x):
+ """
+ Predicts posterior probabilities for the instances in `x`
+
+ :param x: a torch tensor of indexed tokens with shape `(n_instances, pad_length)`
+ where `n_instances` is the number of instances in the batch, and `pad_length`
+ is length of the pad in the batch
+ :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities
+ """
logits = self(x)
return torch.softmax(logits, dim=1).detach().cpu().numpy()
def xavier_uniform(self):
+ """
+ Performs Xavier initialization of the network parameters
+ """
for p in self.parameters():
if p.dim() > 1 and p.requires_grad:
torch.nn.init.xavier_uniform_(p)
@abstractmethod
- def get_params(self): ...
+ def get_params(self):
+ """
+ Get hyper-parameters for this estimator
+
+ :return: a dictionary with parameter names mapped to their values
+ """
+ ...
@property
- def vocabulary_size(self): ...
+ def vocabulary_size(self):
+ """
+ Return the size of the vocabulary
+
+ :return: integer
+ """
+ ...
class LSTMnet(TextClassifierNet):
+ """
+ An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on
+ Long Short Term Memory networks.
+
+ :param vocabulary_size: the size of the vocabulary
+ :param n_classes: number of target classes
+ :param embedding_size: the dimensionality of the word embeddings space (default 100)
+ :param hidden_size: the dimensionality of the hidden space (default 256)
+ :param repr_size: the dimensionality of the document embeddings space (default 100)
+ :param lstm_class_nlayers: number of LSTM layers (default 1)
+ :param drop_p: drop probability for dropout (default 0.5)
+ """
def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1,
drop_p=0.5):
+
super().__init__()
self.vocabulary_size_ = vocabulary_size
self.n_classes = n_classes
@@ -270,7 +409,7 @@ class LSTMnet(TextClassifierNet):
self.doc_embedder = torch.nn.Linear(hidden_size, self.dim)
self.output = torch.nn.Linear(self.dim, n_classes)
- def init_hidden(self, set_size):
+ def __init_hidden(self, set_size):
opt = self.hyperparams
var_hidden = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size'])
var_cell = torch.zeros(opt['lstm_class_nlayers'], set_size, opt['hidden_size'])
@@ -279,21 +418,55 @@ class LSTMnet(TextClassifierNet):
return var_hidden, var_cell
def document_embedding(self, x):
+ """Embeds documents (i.e., performs the forward pass up to the
+ next-to-last layer).
+
+ :param x: a batch of instances, typically generated by a torch's `DataLoader`
+ instance (see :class:`quapy.classification.neural.TorchDataset`)
+ :return: a torch tensor of shape `(n_samples, n_dimensions)`, where
+ `n_samples` is the number of documents, and `n_dimensions` is the
+ dimensionality of the embedding
+ """
embedded = self.word_embedding(x)
- rnn_output, rnn_hidden = self.lstm(embedded, self.init_hidden(x.size()[0]))
+ rnn_output, rnn_hidden = self.lstm(embedded, self.__init_hidden(x.size()[0]))
abstracted = self.dropout(F.relu(rnn_hidden[0][-1]))
abstracted = self.doc_embedder(abstracted)
return abstracted
def get_params(self):
+ """
+ Get hyper-parameters for this estimator
+
+ :return: a dictionary with parameter names mapped to their values
+ """
return self.hyperparams
@property
def vocabulary_size(self):
+ """
+ Return the size of the vocabulary
+
+ :return: integer
+ """
return self.vocabulary_size_
class CNNnet(TextClassifierNet):
+ """
+ An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on
+ Convolutional Neural Networks.
+
+ :param vocabulary_size: the size of the vocabulary
+ :param n_classes: number of target classes
+ :param embedding_size: the dimensionality of the word embeddings space (default 100)
+ :param hidden_size: the dimensionality of the hidden space (default 256)
+ :param repr_size: the dimensionality of the document embeddings space (default 100)
+ :param kernel_heights: list of kernel lengths (default [3,5,7]), i.e., the number of
+ consecutive tokens that each kernel covers
+ :param stride: convolutional stride (default 1)
+ :param stride: convolutional pad (default 0)
+ :param drop_p: drop probability for dropout (default 0.5)
+ """
def __init__(self, vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100,
kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5):
@@ -320,19 +493,28 @@ class CNNnet(TextClassifierNet):
self.doc_embedder = torch.nn.Linear(len(kernel_heights) * hidden_size, self.dim)
self.output = nn.Linear(self.dim, n_classes)
- def conv_block(self, input, conv_layer):
+ def __conv_block(self, input, conv_layer):
conv_out = conv_layer(input) # conv_out.size() = (batch_size, out_channels, dim, 1)
activation = F.relu(conv_out.squeeze(3)) # activation.size() = (batch_size, out_channels, dim1)
max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2) # maxpool_out.size() = (batch_size, out_channels)
return max_out
def document_embedding(self, input):
+ """Embeds documents (i.e., performs the forward pass up to the
+ next-to-last layer).
+
+ :param input: a batch of instances, typically generated by a torch's `DataLoader`
+ instance (see :class:`quapy.classification.neural.TorchDataset`)
+ :return: a torch tensor of shape `(n_samples, n_dimensions)`, where
+ `n_samples` is the number of documents, and `n_dimensions` is the
+ dimensionality of the embedding
+ """
input = self.word_embedding(input)
input = input.unsqueeze(1) # input.size() = (batch_size, 1, num_seq, embedding_length)
- max_out1 = self.conv_block(input, self.conv1)
- max_out2 = self.conv_block(input, self.conv2)
- max_out3 = self.conv_block(input, self.conv3)
+ max_out1 = self.__conv_block(input, self.conv1)
+ max_out2 = self.__conv_block(input, self.conv2)
+ max_out3 = self.__conv_block(input, self.conv3)
all_out = torch.cat((max_out1, max_out2, max_out3), 1) # all_out.size() = (batch_size, num_kernels*out_channels)
abstracted = self.dropout(F.relu(all_out)) # (batch_size, num_kernels*out_channels)
@@ -340,10 +522,20 @@ class CNNnet(TextClassifierNet):
return abstracted
def get_params(self):
+ """
+ Get hyper-parameters for this estimator
+
+ :return: a dictionary with parameter names mapped to their values
+ """
return self.hyperparams
@property
def vocabulary_size(self):
+ """
+ Return the size of the vocabulary
+
+ :return: integer
+ """
return self.vocabulary_size_
diff --git a/quapy/classification/svmperf.py b/quapy/classification/svmperf.py
index b5a4e85..2f6ad90 100644
--- a/quapy/classification/svmperf.py
+++ b/quapy/classification/svmperf.py
@@ -1,17 +1,29 @@
import random
import subprocess
-import tempfile
from os import remove, makedirs
from os.path import join, exists
from subprocess import PIPE, STDOUT
-import shutil
-
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import dump_svmlight_file
class SVMperf(BaseEstimator, ClassifierMixin):
+ """A wrapper for the `SVM-perf package `__ by Thorsten Joachims.
+ When using losses for quantification, the source code has to be patched. See
+ the `installation documentation `__
+ for further details.
+
+ References:
+
+ * `Esuli et al.2015 `__
+ * `Barranquero et al.2015 `__
+
+ :param svmperf_base: path to directory containing the binary files `svm_perf_learn` and `svm_perf_classify`
+ :param C: trade-off between training error and margin (default 0.01)
+ :param verbose: set to True to print svm-perf std outputs
+ :param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae".
+ """
# losses with their respective codes in svm_perf implementation
valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27}
@@ -24,10 +36,22 @@ class SVMperf(BaseEstimator, ClassifierMixin):
self.loss = loss
def set_params(self, **parameters):
+ """
+ Set the hyper-parameters for svm-perf. Currently, only the `C` parameter is supported
+
+ :param parameters: a `**kwargs` dictionary `{'C': }`
+ """
assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported'
self.C = parameters['C']
def fit(self, X, y):
+ """
+ Trains the SVM for the multivariate performance loss
+
+ :param X: training instances
+ :param y: a binary vector of labels
+ :return: `self`
+ """
assert self.loss in SVMperf.valid_losses, \
f'unsupported loss {self.loss}, valid ones are {list(SVMperf.valid_losses.keys())}'
@@ -68,11 +92,24 @@ class SVMperf(BaseEstimator, ClassifierMixin):
return self
def predict(self, X):
+ """
+ Predicts labels for the instances `X`
+ :param X: array-like of shape `(n_samples, n_features)` instances to classify
+ :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of
+ instances in `X`
+ """
confidence_scores = self.decision_function(X)
predictions = (confidence_scores > 0) * 1
return predictions
def decision_function(self, X, y=None):
+ """
+ Evaluate the decision function for the samples in `X`.
+
+ :param X: array-like of shape `(n_samples, n_features)` containing the instances to classify
+ :param y: unused
+ :return: array-like of shape `(n_samples,)` containing the decision scores of the instances
+ """
assert hasattr(self, 'tmpdir'), 'predict called before fit'
assert self.tmpdir is not None, 'model directory corrupted'
assert exists(self.model), 'model not found'
diff --git a/quapy/model_selection.py b/quapy/model_selection.py
index 35f87b9..f4ff185 100644
--- a/quapy/model_selection.py
+++ b/quapy/model_selection.py
@@ -91,8 +91,8 @@ class GridSearchQ(BaseQuantifier):
if self.protocol=='npp' and (self.eval_budget is None or self.eval_budget <= 0):
raise ValueError(f'when protocol="npp" the parameter eval_budget should be '
f'indicated (and should be >0).')
- if self.n_prevpoints != 1:
- print('[warning] n_prevpoints has been set and will be ignored for the selected protocol')
+ if self.n_repetitions != 1:
+ print('[warning] n_repetitions has been set and will be ignored for the selected protocol')
def _sout(self, msg):
if self.verbose:
@@ -165,7 +165,6 @@ class GridSearchQ(BaseQuantifier):
params_values = list(self.param_grid.values())
model = self.model
- n_jobs = self.n_jobs
if self.timeout > 0:
def handler(signum, frame):
@@ -174,7 +173,6 @@ class GridSearchQ(BaseQuantifier):
signal.signal(signal.SIGALRM, handler)
- self._sout(f'starting optimization with n_jobs={n_jobs}')
self.param_scores_ = {}
self.best_score_ = None
some_timeouts = False
diff --git a/quapy/util.py b/quapy/util.py
index 96c5835..9eafdfa 100644
--- a/quapy/util.py
+++ b/quapy/util.py
@@ -83,6 +83,7 @@ def download_file_if_not_exists(url, archive_path):
def create_if_not_exist(path):
os.makedirs(path, exist_ok=True)
+ return path
def get_quapy_home():