From 020530e14f26785836d0bd9c6344b66692bad6a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Czy=C5=BC?= Date: Fri, 15 Mar 2024 16:52:19 +0100 Subject: [PATCH 1/8] Add example for Bayesian quantification. --- examples/bayesian_quantification.py | 189 ++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 examples/bayesian_quantification.py diff --git a/examples/bayesian_quantification.py b/examples/bayesian_quantification.py new file mode 100644 index 0000000..fda97cd --- /dev/null +++ b/examples/bayesian_quantification.py @@ -0,0 +1,189 @@ +""" +This example shows how to use Bayesian quantification (https://arxiv.org/abs/2302.09159), +which is suitable for low-data situations and when the uncertainty of the prevalence estimate is of interest. + +For this, we will need to install extra dependencies: + +``` +$ pip install quapy[bayesian] +``` + +Running the script via: + +``` +$ python examples/bayesian_quantification.py +``` + +will produce a plot `bayesian_quantification.pdf`. + +Due to a low sample size and the fact that classes 2 and 3 are hard to distinguish, +it is hard to estimate the proportions accurately, what is visible by looking at the posterior samples, +showing large uncertainty. +""" +from dataclasses import dataclass + +import numpy as np +import matplotlib.pyplot as plt + +from sklearn.ensemble import RandomForestClassifier + +from quapy.method.aggregative import BayesianCC, ACC, PACC +from quapy.data import LabelledCollection + +FIGURE_PATH = "bayesian_quantification.pdf" + + +@dataclass +class SimulatedData: + n_classes: int + X_train: np.ndarray + Y_train: np.ndarray + X_test: np.ndarray + Y_test: np.ndarray + + +def simulate_data(rng) -> SimulatedData: + """Generates a simulated data set with three classes.""" + cov = np.eye(2) + + n_train = [400, 400, 400] + n_test = [40, 25, 15] + + mus = [np.zeros(2), np.array([1, 1.5]), np.array([1.5, 1])] + + X_train = np.concatenate([ + rng.multivariate_normal(mus[i], cov, size=n_train[i]) + for i in range(3) + ]) + + X_test = np.concatenate([ + rng.multivariate_normal(mus[i], cov, size=n_test[i]) + for i in range(3) + ]) + + Y_train = np.concatenate([[i] * n for i, n in enumerate(n_train)]) + Y_test = np.concatenate([[i] * n for i, n in enumerate(n_test)]) + + return SimulatedData( + n_classes=3, + X_train=X_train, + X_test=X_test, + Y_train=Y_train, + Y_test=Y_test, + ) + + +def plot_simulated_data(axs, data: SimulatedData) -> None: + """Plots a simulated data set. + + Args: + axs: a list of three `plt.Axes` objects, on which the samples will be plotted. + data: the simulated data set. + """ + xlim = ( + -0.3 + min(data.X_train[:, 0].min(), data.X_test[:, 0].min()), + 0.3 + max(data.X_train[:, 0].max(), data.X_test[:, 0].max()) + ) + ylim = ( + -0.3 + min(data.X_train[:, 1].min(), data.X_test[:, 1].min()), + 0.3 + max(data.X_train[:, 1].max(), data.X_test[:, 1].max()) + ) + + for ax in axs: + ax.set_xlabel("$X_1$") + ax.set_ylabel("$X_2$") + ax.set_aspect("equal") + ax.set_xlim(*xlim) + ax.set_ylim(*ylim) + + ax = axs[0] + ax.set_title("Training set") + for i in range(data.n_classes): + ax.scatter(data.X_train[data.Y_train == i, 0], data.X_train[data.Y_train == i, 1], c=f"C{i}", s=3, rasterized=True) + + ax = axs[1] + ax.set_title("Test set\n(with labels)") + for i in range(data.n_classes): + ax.scatter(data.X_test[data.Y_test == i, 0], data.X_test[data.Y_test == i, 1], c=f"C{i}", s=3, rasterized=True) + + ax = axs[2] + ax.set_title("Test set\n(as observed)") + ax.scatter(data.X_test[:, 0], data.X_test[:, 1], c="C5", s=3, rasterized=True) + +def get_random_forest() -> RandomForestClassifier: + return RandomForestClassifier(n_estimators=10, random_state=5) + +def train_and_plot_bayesian_quantification(ax: plt.Axes, training: LabelledCollection, test: np.ndarray, n_classes: int) -> None: + quantifier = BayesianCC(classifier=get_random_forest()) + quantifier.fit(training) + + # Obtain mean prediction + mean_prediction = quantifier.quantify(test) + x_ax = np.arange(n_classes) + ax.plot(x_ax, mean_prediction, c="salmon", linewidth=2, linestyle=":", label="Bayesian") + + # Obtain individual samples + samples = quantifier.get_prevalence_samples() + for sample in samples[::5, :]: + ax.plot(x_ax, sample, c="salmon", alpha=0.1, linewidth=0.3, rasterized=True) + + +def _get_estimate(estimator_class, training: LabelledCollection, test: np.ndarray) -> None: + estimator = estimator_class(get_random_forest()) + estimator.fit(training) + return estimator.quantify(test) + +def train_and_plot_acc(ax: plt.Axes, training: LabelledCollection, test: np.ndarray, n_classes: int) -> None: + estimate = _get_estimate(ACC, training, test) + ax.plot(np.arange(n_classes), estimate, c="darkblue", linewidth=2, linestyle=":", label="ACC") + + +def train_and_plot_pacc(ax: plt.Axes, training: LabelledCollection, test: np.ndarray, n_classes: int) -> None: + estimate = _get_estimate(PACC, training, test) + ax.plot(np.arange(n_classes), estimate, c="limegreen", linewidth=2, linestyle=":", label="PACC") + + +def plot_true_proportions(ax: plt.Axes, test_labels: np.ndarray, n_classes: int) -> None: + counts = np.bincount(test_labels, minlength=n_classes) + proportion = counts / counts.sum() + + x_ax = np.arange(n_classes) + ax.plot(x_ax, proportion, c="black", linewidth=2, label="True") + + ax.set_xlabel("Class") + ax.set_ylabel("Prevalence") + ax.set_xticks(x_ax, x_ax + 1) + ax.set_yticks([0, 0.25, 0.5, 0.75, 1.0]) + ax.set_xlim(-0.1, n_classes - 0.9) + ax.set_ylim(-0.01, 1.01) + + + +def main() -> None: + # --- Simulate data --- + rng = np.random.default_rng(42) + data = simulate_data(rng) + + # --- Plot simulated data --- + fig, axs = plt.subplots(1, 4, figsize=(13, 3), dpi=300) + for ax in axs: + ax.spines[['top', 'right']].set_visible(False) + plot_simulated_data(axs[:3], data) + + # --- Plot quantification results --- + ax = axs[3] + plot_true_proportions(ax, test_labels=data.Y_test, n_classes=data.n_classes) + + training = LabelledCollection(data.X_train, data.Y_train) + train_and_plot_acc(ax, training=training, test=data.X_test, n_classes=data.n_classes) + train_and_plot_pacc(ax, training=training, test=data.X_test, n_classes=data.n_classes) + train_and_plot_bayesian_quantification(ax=ax, training=training, test=data.X_test, n_classes=data.n_classes) + + ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', frameon=False) + + fig.tight_layout() + fig.savefig(FIGURE_PATH) + + +if __name__ == '__main__': + main() From 4dd66b192136ed8284d0e7a7649523948296294b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Czy=C5=BC?= Date: Fri, 15 Mar 2024 17:06:20 +0100 Subject: [PATCH 2/8] Add projection onto the probability simplex --- quapy/functional.py | 55 +++++++++++++++++++++++++++++++++++-- quapy/method/aggregative.py | 3 +- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/quapy/functional.py b/quapy/functional.py index 3a4ebfa..1459a0f 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -1,6 +1,6 @@ import itertools from collections import defaultdict -from typing import Union, Callable +from typing import Literal, Union, Callable import scipy import numpy as np @@ -374,4 +374,55 @@ def linear_search(loss, n_classes): if min_score is None or score < min_score: prev_selected, min_score = prev, score - return np.asarray([1 - prev_selected, prev_selected]) \ No newline at end of file + return np.asarray([1 - prev_selected, prev_selected]) + + +def _project_onto_probability_simplex(v: np.ndarray) -> np.ndarray: + """Projects a point onto the probability simplex. + + The code is adapted from Mathieu Blondel's BSD-licensed + `implementation `_ + which is accompanying the paper + + Mathieu Blondel, Akinori Fujino, and Naonori Ueda. + Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex, + ICPR 2014, `URL `_ + + :param v: point in n-dimensional space, shape `(n,)` + :return: projection of `v` onto (n-1)-dimensional probability simplex, shape `(n,)` + """ + v = np.asarray(v) + n = len(v) + + # Sort the values in the descending order + u = np.sort(v)[::-1] + + cssv = np.cumsum(u) - 1.0 + ind = np.arange(1, n + 1) + cond = u - cssv / ind > 0 + rho = ind[cond][-1] + theta = cssv[cond][-1] / float(rho) + return np.maximum(v - theta, 0) + + + +def clip_prevalence(p: np.ndarray, method: Literal[None, "none", "clip", "project"]) -> np.ndarray: + """ + Clips the proportions vector `p` so that it is a valid probability distribution. + + :param p: the proportions vector to be clipped, shape `(n_classes,)` + :param method: the method to use for normalization. + If `None` or `"none"`, no normalization is performed. + If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. + If `"project"`, the values are projected onto the probability simplex. + :return: the normalized prevalence vector, shape `(n_classes,)` + """ + if method is None or method == "none": + return p + elif method == "clip": + adjusted = np.clip(p, 0, 1) + return adjusted / adjusted.sum() + elif method == "project": + return _project_onto_probability_simplex(p) + else: + raise ValueError(f"Method {method} not known.") diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index da98358..5ea0473 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -443,8 +443,7 @@ class ACC(AggregativeCrispQuantifier): try: adjusted_prevs = np.linalg.solve(A, B) - adjusted_prevs = np.clip(adjusted_prevs, 0, 1) - adjusted_prevs /= adjusted_prevs.sum() + adjusted_prevs = F.clip_prevalence(adjusted_prevs, method="clip") except np.linalg.LinAlgError: adjusted_prevs = prevs_estim # no way to adjust them! From d34b086a767147c81e7db9312fd206c7936a9cd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Czy=C5=BC?= Date: Fri, 15 Mar 2024 17:58:23 +0100 Subject: [PATCH 3/8] Refactor solving routine --- quapy/functional.py | 64 +++++++++++++++++++++++++++++++++++++ quapy/method/aggregative.py | 28 ++++------------ 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/quapy/functional.py b/quapy/functional.py index 1459a0f..eb4485e 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -1,4 +1,5 @@ import itertools +import warnings from collections import defaultdict from typing import Literal, Union, Callable @@ -426,3 +427,66 @@ def clip_prevalence(p: np.ndarray, method: Literal[None, "none", "clip", "projec return _project_onto_probability_simplex(p) else: raise ValueError(f"Method {method} not known.") + + +def solve_adjustment( + p_c_y: np.ndarray, + p_c: np.ndarray, + method: Literal["inversion", "invariant-ratio"], + solver: Literal["exact", "minimize", "exact-raise", "exact-cc"], +) -> np.ndarray: + """ + Function finding the prevalence vector by adjusting + the classifier predictions. + + :param p_c_y: array of shape `(n_classes, n_classes,)` with entry `(c,y)` being the estimate + of :math:`P(C=c|Y=y)`, that is, the probability that an instance that belongs to class :math:`y` + ends up being classified as belonging to class :math:`c` + :param p_c: classifier predictions, where the entry `c` is the estimate of :math:`P(C=c)`. Shape `(n_classes,)` + :param method: adjustment method to be used: + 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, + which tries to invert `P(C|Y)` matrix. + 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, + which replaces the last equation with the normalization condition. + :param solver: the method to use for solving the system of linear equations. Valid options are: + 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has + rank strictly less than `n_classes`. + 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds + to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) + 'exact': deprecated, defaults to 'exact-cc' + 'minimize': minimizes a loss, so the solution always exists + """ + if solver == "exact": + warnings.warn("The 'exact' solver is deprecated. Use 'exact-raise' or 'exact-cc'", DeprecationWarning, stacklevel=2) + solver = "exact-cc" + + A = np.array(p_c_y, dtype=float) + B = np.array(p_c, dtype=float) + + if method == "inversion": + pass # We leave A and B unchanged + elif method == "invariant-ratio": + # Change the last set of equations + raise NotImplementedError + else: + raise ValueError(f"Flavour {method} not known.") + + + if solver == "minimize": + def loss(prev): + return np.linalg.norm(A @ prev - B) + return optim_minimize(loss, n_classes=A.shape[0]) + else: + # Solvers based on matrix inversion, so we use try/except block + try: + return np.linalg.solve(A, B) + except np.linalg.LinAlgError: + # The matrix is not invertible. + # Depending on the solver, we either raise an error + # or return the classifier predictions without adjustment + if solver == "exact-raise": + raise + elif solver == "exact-cc": + return p_c + else: + raise ValueError(f"Solver {solver} not known.") diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 5ea0473..77a4eaf 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -435,27 +435,13 @@ class ACC(AggregativeCrispQuantifier): :return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates """ - A = PteCondEstim - B = prevs_estim - - if solver == 'exact': - # attempts an exact solution of the linear system (may fail) - - try: - adjusted_prevs = np.linalg.solve(A, B) - adjusted_prevs = F.clip_prevalence(adjusted_prevs, method="clip") - except np.linalg.LinAlgError: - adjusted_prevs = prevs_estim # no way to adjust them! - - return adjusted_prevs - - elif solver == 'minimize': - # poses the problem as an optimization one, and tries to minimize the norm of the differences - - def loss(prev): - return np.linalg.norm(A @ prev - B) - - return F.optim_minimize(loss, n_classes=A.shape[0]) + estimate = F.solve_adjustment( + p_c_y=PteCondEstim, + p_c=prevs_estim, + solver=solver, + method='inversion', + ) + return F.clip_prevalence(estimate, method="clip") class PCC(AggregativeSoftQuantifier): From 5cdd158fcc778b690e5f64bec7744d4a42931a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Czy=C5=BC?= Date: Fri, 15 Mar 2024 18:14:42 +0100 Subject: [PATCH 4/8] Add invariant ratio estimators. --- quapy/functional.py | 9 +-- quapy/method/aggregative.py | 138 +++++++++++++++++++++++------------- 2 files changed, 92 insertions(+), 55 deletions(-) diff --git a/quapy/functional.py b/quapy/functional.py index eb4485e..84acdbc 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -466,11 +466,12 @@ def solve_adjustment( if method == "inversion": pass # We leave A and B unchanged elif method == "invariant-ratio": - # Change the last set of equations - raise NotImplementedError + # Change the last equation to replace + # it with the normalization condition + A[-1, :] = 1.0 + B[-1] = 1.0 else: - raise ValueError(f"Flavour {method} not known.") - + raise ValueError(f"Method {method} not known.") if solver == "minimize": def loss(prev): diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 77a4eaf..3b44491 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from copy import deepcopy -from typing import Callable, Union +from typing import Callable, Literal, Union import numpy as np from abstention.calibration import NoBiasVectorScaling, TempScaling, VectorScaling from scipy import optimize @@ -367,28 +367,50 @@ class ACC(AggregativeCrispQuantifier): Alternatively, this set can be specified at fit time by indicating the exact set of data on which the predictions are to be generated. :param n_jobs: number of parallel workers - :param solver: indicates the method to be used for obtaining the final estimates. The choice - 'exact' comes down to solving the system of linear equations :math:`Ax=B` where `A` is a - matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in - binary) and `B` is the vector of prevalence values estimated via CC, as :math:`x=A^{-1}B`. This solution - might not exist for degenerated classifiers, in which case the method defaults to classify and count - (i.e., does not attempt any adjustment). - Another option is to search for the prevalence vector that minimizes the L2 norm of :math:`|Ax-B|`. The latter - is achieved by indicating solver='minimize'. This one generally works better, and is the default parameter. - More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and - Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications - (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. + :param method: adjustment method to be used: + 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, + which tries to invert `P(C|Y)` matrix. + 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, + which replaces the last equation with the normalization condition. + :param solver: the method to use for solving the system of linear equations. Valid options are: + 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has + rank strictly less than `n_classes`. + 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds + to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) + 'exact': deprecated, defaults to 'exact-cc' + 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the default parameter. + More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and + Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications + (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. + :param clipping: the method to use for normalization. + If `None` or `"none"`, no normalization is performed. + If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. + If `"project"`, the values are projected onto the probability simplex. """ - def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'): + def __init__( + self, + classifier: BaseEstimator, + val_split=5, + n_jobs=None, + solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', + method: Literal['inversion', 'invariant-ratio'] = 'inversion', + clipping: Literal['clip', 'none', 'project'] = 'clip', + ) -> None: self.classifier = classifier self.val_split = val_split self.n_jobs = qp._get_njobs(n_jobs) self.solver = solver + self.method = method + self.clipping = clipping def _check_init_parameters(self): - if self.solver not in ['exact', 'minimize']: - raise ValueError("unknown solver; valid ones are 'exact', 'minimize'") + if self.solver not in ['exact', 'minimize', 'exact-raise', 'exact-cc']: + raise ValueError("unknown solver; valid ones are 'exact', 'minimize', 'exact-raise', 'exact-cc'") + if self.method not in ['inversion', 'invariant-ratio']: + raise ValueError("unknown method; valid ones are 'inversion', 'invariant-ratio'") + if self.clipping not in ['clip', 'none', 'project', None]: + raise ValueError("unknown clipping; valid ones are 'clip', 'none', 'project' or None") def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): """ @@ -418,30 +440,13 @@ class ACC(AggregativeCrispQuantifier): def aggregate(self, classif_predictions): prevs_estim = self.cc.aggregate(classif_predictions) - return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim, solver=self.solver) - - @classmethod - def solve_adjustment(cls, PteCondEstim, prevs_estim, solver='exact'): - """ - Solves the system linear system :math:`Ax = B` with :math:`A` = `PteCondEstim` and :math:`B` = `prevs_estim` - - :param PteCondEstim: a `np.ndarray` of shape `(n_classes,n_classes,)` with entry `(i,j)` being the estimate - of :math:`P(y_i|y_j)`, that is, the probability that an instance that belongs to :math:`y_j` ends up being - classified as belonging to :math:`y_i` - :param prevs_estim: a `np.ndarray` of shape `(n_classes,)` with the class prevalence estimates - :param solver: indicates the method to use for solving the system of linear equations. Valid options are - 'exact' (tries to solve the system --may fail if the misclassificatin matrix has rank < n_classes) or - 'optim_minimize' (minimizes a norm --always exists). - :return: an adjusted `np.ndarray` of shape `(n_classes,)` with the corrected class prevalence estimates - """ - estimate = F.solve_adjustment( - p_c_y=PteCondEstim, + p_c_y=self.Pte_cond_estim_, p_c=prevs_estim, - solver=solver, - method='inversion', + solver=self.solver, + method=self.method, ) - return F.clip_prevalence(estimate, method="clip") + return F.clip_prevalence(estimate, method=self.clipping) class PCC(AggregativeSoftQuantifier): @@ -481,28 +486,51 @@ class PACC(AggregativeSoftQuantifier): for `k`). Alternatively, this set can be specified at fit time by indicating the exact set of data on which the predictions are to be generated. :param n_jobs: number of parallel workers - :param solver: indicates the method to be used for obtaining the final estimates. The choice - 'exact' comes down to solving the system of linear equations :math:`Ax=B` where `A` is a - matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in - binary) and `B` is the vector of prevalence values estimated via CC, as :math:`x=A^{-1}B`. This solution - might not exist for degenerated classifiers, in which case the method defaults to classify and count - (i.e., does not attempt any adjustment). - Another option is to search for the prevalence vector that minimizes the L2 norm of :math:`|Ax-B|`. The latter - is achieved by indicating solver='minimize'. This one generally works better, and is the default parameter. - More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and - Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications - (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. - + :param method: adjustment method to be used: + 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, + which tries to invert `P(C|Y)` matrix. + 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, + which replaces the last equation with the normalization condition. + :param solver: the method to use for solving the system of linear equations. Valid options are: + 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has + rank strictly less than `n_classes`. + 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds + to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) + 'exact': deprecated, defaults to 'exact-cc' + 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the default parameter. + More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and + Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications + (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. + :param clipping: the method to use for normalization. + If `None` or `"none"`, no normalization is performed. + If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. + If `"project"`, the values are projected onto the probability simplex. """ - def __init__(self, classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize'): + def __init__( + self, + classifier: BaseEstimator, + val_split=5, + n_jobs=None, + solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', + method: Literal['inversion', 'invariant-ratio'] = 'inversion', + clipping: Literal['clip', 'none', 'project'] = 'clip', + ) -> None: self.classifier = classifier self.val_split = val_split self.n_jobs = qp._get_njobs(n_jobs) + self.solver = solver + self.method = method + self.clipping = clipping def _check_init_parameters(self): - assert self.solver in ['exact', 'minimize'], "unknown solver; valid ones are 'exact', 'minimize'" + if self.solver not in ['exact', 'minimize', 'exact-raise', 'exact-cc']: + raise ValueError("unknown solver; valid ones are 'exact', 'minimize', 'exact-raise', 'exact-cc'") + if self.method not in ['inversion', 'invariant-ratio']: + raise ValueError("unknown method; valid ones are 'inversion', 'invariant-ratio'") + if self.clipping not in ['clip', 'none', 'project', None]: + raise ValueError("unknown clipping; valid ones are 'clip', 'none', 'project' or None") def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): """ @@ -518,7 +546,15 @@ class PACC(AggregativeSoftQuantifier): def aggregate(self, classif_posteriors): prevs_estim = self.pcc.aggregate(classif_posteriors) - return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim, solver=self.solver) + + estimate = F.solve_adjustment( + p_c_y=self.Pte_cond_estim_, + p_c=prevs_estim, + solver=self.solver, + method=self.method, + ) + return F.clip_prevalence(estimate, method=self.clipping) + @classmethod def getPteCondEstim(cls, classes, y, y_): From 2db7cf20bde2ef1b9a48ef4669718b0e720dc4cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Czy=C5=BC?= Date: Sat, 16 Mar 2024 12:14:42 +0100 Subject: [PATCH 5/8] Improve the plot, add more comments. --- examples/bayesian_quantification.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/examples/bayesian_quantification.py b/examples/bayesian_quantification.py index fda97cd..3bca084 100644 --- a/examples/bayesian_quantification.py +++ b/examples/bayesian_quantification.py @@ -35,6 +35,7 @@ FIGURE_PATH = "bayesian_quantification.pdf" @dataclass class SimulatedData: + """Auxiliary class to keep the training and test data sets.""" n_classes: int X_train: np.ndarray Y_train: np.ndarray @@ -44,13 +45,16 @@ class SimulatedData: def simulate_data(rng) -> SimulatedData: """Generates a simulated data set with three classes.""" - cov = np.eye(2) + # Number of examples of each class in both data sets n_train = [400, 400, 400] n_test = [40, 25, 15] + # Mean vectors and shared covariance of P(X|Y) distributions mus = [np.zeros(2), np.array([1, 1.5]), np.array([1.5, 1])] + cov = np.eye(2) + # Generate the features accordingly X_train = np.concatenate([ rng.multivariate_normal(mus[i], cov, size=n_train[i]) for i in range(3) @@ -95,6 +99,8 @@ def plot_simulated_data(axs, data: SimulatedData) -> None: ax.set_aspect("equal") ax.set_xlim(*xlim) ax.set_ylim(*ylim) + ax.set_xticks([]) + ax.set_yticks([]) ax = axs[0] ax.set_title("Training set") @@ -110,10 +116,14 @@ def plot_simulated_data(axs, data: SimulatedData) -> None: ax.set_title("Test set\n(as observed)") ax.scatter(data.X_test[:, 0], data.X_test[:, 1], c="C5", s=3, rasterized=True) + def get_random_forest() -> RandomForestClassifier: + """An auxiliary factory method to generate a random forest.""" return RandomForestClassifier(n_estimators=10, random_state=5) + def train_and_plot_bayesian_quantification(ax: plt.Axes, training: LabelledCollection, test: np.ndarray, n_classes: int) -> None: + """Fits Bayesian quantification and plots posterior mean as well as individual samples""" quantifier = BayesianCC(classifier=get_random_forest()) quantifier.fit(training) @@ -129,10 +139,12 @@ def train_and_plot_bayesian_quantification(ax: plt.Axes, training: LabelledColle def _get_estimate(estimator_class, training: LabelledCollection, test: np.ndarray) -> None: + """Auxiliary method for running ACC and PACC.""" estimator = estimator_class(get_random_forest()) estimator.fit(training) return estimator.quantify(test) + def train_and_plot_acc(ax: plt.Axes, training: LabelledCollection, test: np.ndarray, n_classes: int) -> None: estimate = _get_estimate(ACC, training, test) ax.plot(np.arange(n_classes), estimate, c="darkblue", linewidth=2, linestyle=":", label="ACC") @@ -144,6 +156,7 @@ def train_and_plot_pacc(ax: plt.Axes, training: LabelledCollection, test: np.nda def plot_true_proportions(ax: plt.Axes, test_labels: np.ndarray, n_classes: int) -> None: + """Plots the true proportions.""" counts = np.bincount(test_labels, minlength=n_classes) proportion = counts / counts.sum() From 6ca89d0e555ce6fcd286575fc0e8ab32ddb72149 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 18 Mar 2024 11:36:27 +0100 Subject: [PATCH 6/8] small refactoring to reuse labelled collections and dataset classes instead of new dataclasses specific to it --- examples/bayesian_quantification.py | 178 +++++++++++++--------------- 1 file changed, 84 insertions(+), 94 deletions(-) diff --git a/examples/bayesian_quantification.py b/examples/bayesian_quantification.py index 3bca084..06c928c 100644 --- a/examples/bayesian_quantification.py +++ b/examples/bayesian_quantification.py @@ -20,30 +20,21 @@ Due to a low sample size and the fact that classes 2 and 3 are hard to distingui it is hard to estimate the proportions accurately, what is visible by looking at the posterior samples, showing large uncertainty. """ -from dataclasses import dataclass import numpy as np import matplotlib.pyplot as plt +import quapy as qp from sklearn.ensemble import RandomForestClassifier from quapy.method.aggregative import BayesianCC, ACC, PACC -from quapy.data import LabelledCollection +from quapy.data import LabelledCollection, Dataset + FIGURE_PATH = "bayesian_quantification.pdf" -@dataclass -class SimulatedData: - """Auxiliary class to keep the training and test data sets.""" - n_classes: int - X_train: np.ndarray - Y_train: np.ndarray - X_test: np.ndarray - Y_test: np.ndarray - - -def simulate_data(rng) -> SimulatedData: +def simulate_data(rng) -> Dataset: """Generates a simulated data set with three classes.""" # Number of examples of each class in both data sets @@ -53,44 +44,33 @@ def simulate_data(rng) -> SimulatedData: # Mean vectors and shared covariance of P(X|Y) distributions mus = [np.zeros(2), np.array([1, 1.5]), np.array([1.5, 1])] cov = np.eye(2) - + + def gen_Xy(centers, sizes): + X = np.concatenate([rng.multivariate_normal(mu_i, cov, size_i) for mu_i, size_i in zip(centers, sizes)]) + y = np.concatenate([[i] * n for i, n in enumerate(sizes)]) + return X, y + # Generate the features accordingly - X_train = np.concatenate([ - rng.multivariate_normal(mus[i], cov, size=n_train[i]) - for i in range(3) - ]) + train = LabelledCollection(*gen_Xy(centers=mus, sizes=n_train)) + test = LabelledCollection(*gen_Xy(centers=mus, sizes=n_test)) - X_test = np.concatenate([ - rng.multivariate_normal(mus[i], cov, size=n_test[i]) - for i in range(3) - ]) - - Y_train = np.concatenate([[i] * n for i, n in enumerate(n_train)]) - Y_test = np.concatenate([[i] * n for i, n in enumerate(n_test)]) - - return SimulatedData( - n_classes=3, - X_train=X_train, - X_test=X_test, - Y_train=Y_train, - Y_test=Y_test, - ) + return Dataset(training=train, test=test) -def plot_simulated_data(axs, data: SimulatedData) -> None: +def plot_simulated_data(axs, data: Dataset) -> None: """Plots a simulated data set. - - Args: - axs: a list of three `plt.Axes` objects, on which the samples will be plotted. - data: the simulated data set. + + :param axs: a list of three `plt.Axes` objects, on which the samples will be plotted. + :param data: the simulated data set. """ + train, test = data.train_test xlim = ( - -0.3 + min(data.X_train[:, 0].min(), data.X_test[:, 0].min()), - 0.3 + max(data.X_train[:, 0].max(), data.X_test[:, 0].max()) + -0.3 + min(train.X[:, 0].min(), test.X[:, 0].min()), + 0.3 + max(train.X[:, 0].max(), test.X[:, 0].max()) ) ylim = ( - -0.3 + min(data.X_train[:, 1].min(), data.X_test[:, 1].min()), - 0.3 + max(data.X_train[:, 1].max(), data.X_test[:, 1].max()) + -0.3 + min(train.X[:, 1].min(), test.X[:, 1].min()), + 0.3 + max(train.X[:, 1].max(), test.X[:, 1].max()) ) for ax in axs: @@ -105,63 +85,23 @@ def plot_simulated_data(axs, data: SimulatedData) -> None: ax = axs[0] ax.set_title("Training set") for i in range(data.n_classes): - ax.scatter(data.X_train[data.Y_train == i, 0], data.X_train[data.Y_train == i, 1], c=f"C{i}", s=3, rasterized=True) + ax.scatter(train.X[train.y == i, 0], train.X[train.y == i, 1], c=f"C{i}", s=3, rasterized=True) ax = axs[1] ax.set_title("Test set\n(with labels)") for i in range(data.n_classes): - ax.scatter(data.X_test[data.Y_test == i, 0], data.X_test[data.Y_test == i, 1], c=f"C{i}", s=3, rasterized=True) + ax.scatter(test.X[test.y == i, 0], test.X[test.y == i, 1], c=f"C{i}", s=3, rasterized=True) ax = axs[2] ax.set_title("Test set\n(as observed)") - ax.scatter(data.X_test[:, 0], data.X_test[:, 1], c="C5", s=3, rasterized=True) + ax.scatter(test.X[:, 0], test.X[:, 1], c="C5", s=3, rasterized=True) -def get_random_forest() -> RandomForestClassifier: - """An auxiliary factory method to generate a random forest.""" - return RandomForestClassifier(n_estimators=10, random_state=5) - - -def train_and_plot_bayesian_quantification(ax: plt.Axes, training: LabelledCollection, test: np.ndarray, n_classes: int) -> None: - """Fits Bayesian quantification and plots posterior mean as well as individual samples""" - quantifier = BayesianCC(classifier=get_random_forest()) - quantifier.fit(training) - - # Obtain mean prediction - mean_prediction = quantifier.quantify(test) - x_ax = np.arange(n_classes) - ax.plot(x_ax, mean_prediction, c="salmon", linewidth=2, linestyle=":", label="Bayesian") - - # Obtain individual samples - samples = quantifier.get_prevalence_samples() - for sample in samples[::5, :]: - ax.plot(x_ax, sample, c="salmon", alpha=0.1, linewidth=0.3, rasterized=True) - - -def _get_estimate(estimator_class, training: LabelledCollection, test: np.ndarray) -> None: - """Auxiliary method for running ACC and PACC.""" - estimator = estimator_class(get_random_forest()) - estimator.fit(training) - return estimator.quantify(test) - - -def train_and_plot_acc(ax: plt.Axes, training: LabelledCollection, test: np.ndarray, n_classes: int) -> None: - estimate = _get_estimate(ACC, training, test) - ax.plot(np.arange(n_classes), estimate, c="darkblue", linewidth=2, linestyle=":", label="ACC") - - -def train_and_plot_pacc(ax: plt.Axes, training: LabelledCollection, test: np.ndarray, n_classes: int) -> None: - estimate = _get_estimate(PACC, training, test) - ax.plot(np.arange(n_classes), estimate, c="limegreen", linewidth=2, linestyle=":", label="PACC") - - -def plot_true_proportions(ax: plt.Axes, test_labels: np.ndarray, n_classes: int) -> None: +def plot_true_proportions(ax: plt.Axes, test_prevalence: np.ndarray) -> None: """Plots the true proportions.""" - counts = np.bincount(test_labels, minlength=n_classes) - proportion = counts / counts.sum() - + n_classes = len(test_prevalence) x_ax = np.arange(n_classes) - ax.plot(x_ax, proportion, c="black", linewidth=2, label="True") + ax.plot(x_ax, test_prevalence, c="black", linewidth=2, label="True") ax.set_xlabel("Class") ax.set_ylabel("Prevalence") @@ -171,11 +111,59 @@ def plot_true_proportions(ax: plt.Axes, test_labels: np.ndarray, n_classes: int) ax.set_ylim(-0.01, 1.01) +def get_random_forest() -> RandomForestClassifier: + """An auxiliary factory method to generate a random forest.""" + return RandomForestClassifier(n_estimators=10, random_state=5) + + +def _get_estimate(estimator_class, training: LabelledCollection, test: np.ndarray) -> None: + """Auxiliary method for running ACC and PACC.""" + estimator = estimator_class(get_random_forest()) + estimator.fit(training) + return estimator.quantify(test) + + +def train_and_plot_bayesian_quantification(ax: plt.Axes, training: LabelledCollection, test: LabelledCollection) -> None: + """Fits Bayesian quantification and plots posterior mean as well as individual samples""" + print('training model Bayesian CC...', end='') + quantifier = BayesianCC(classifier=get_random_forest()) + quantifier.fit(training) + + # Obtain mean prediction + mean_prediction = quantifier.quantify(test.X) + mae = qp.error.mae(test.prevalence(), mean_prediction) + x_ax = np.arange(training.n_classes) + ax.plot(x_ax, mean_prediction, c="salmon", linewidth=2, linestyle=":", label="Bayesian") + + # Obtain individual samples + samples = quantifier.get_prevalence_samples() + for sample in samples[::5, :]: + ax.plot(x_ax, sample, c="salmon", alpha=0.1, linewidth=0.3, rasterized=True) + print(f'MAE={mae:.4f} [done]') + + +def train_and_plot_acc(ax: plt.Axes, training: LabelledCollection, test: LabelledCollection) -> None: + print('training model ACC...', end='') + estimate = _get_estimate(ACC, training, test.X) + mae = qp.error.mae(test.prevalence(), estimate) + ax.plot(np.arange(training.n_classes), estimate, c="darkblue", linewidth=2, linestyle=":", label="ACC") + print(f'MAE={mae:.4f} [done]') + + +def train_and_plot_pacc(ax: plt.Axes, training: LabelledCollection, test: LabelledCollection) -> None: + print('training model PACC...', end='') + estimate = _get_estimate(PACC, training, test.X) + mae = qp.error.mae(test.prevalence(), estimate) + ax.plot(np.arange(training.n_classes), estimate, c="limegreen", linewidth=2, linestyle=":", label="PACC") + print(f'MAE={mae:.4f} [done]') + def main() -> None: # --- Simulate data --- + print('generating simulated data') rng = np.random.default_rng(42) data = simulate_data(rng) + training, test = data.train_test # --- Plot simulated data --- fig, axs = plt.subplots(1, 4, figsize=(13, 3), dpi=300) @@ -185,17 +173,19 @@ def main() -> None: # --- Plot quantification results --- ax = axs[3] - plot_true_proportions(ax, test_labels=data.Y_test, n_classes=data.n_classes) - - training = LabelledCollection(data.X_train, data.Y_train) - train_and_plot_acc(ax, training=training, test=data.X_test, n_classes=data.n_classes) - train_and_plot_pacc(ax, training=training, test=data.X_test, n_classes=data.n_classes) - train_and_plot_bayesian_quantification(ax=ax, training=training, test=data.X_test, n_classes=data.n_classes) + plot_true_proportions(ax, test_prevalence=test.prevalence()) + + train_and_plot_acc(ax, training=training, test=test) + train_and_plot_pacc(ax, training=training, test=test) + train_and_plot_bayesian_quantification(ax=ax, training=training, test=test) + print('[done]') ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', frameon=False) + print(f'saving plot in path {FIGURE_PATH}...', end='') fig.tight_layout() fig.savefig(FIGURE_PATH) + print('[done]') if __name__ == '__main__': From 36ac6db27d759e05cf07b106acb9b9ca4bc35470 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 18 Mar 2024 23:39:55 +0100 Subject: [PATCH 7/8] fixing doc --- TODO.txt | 2 + docs/Makefile | 20 + docs/build/html/_static/basic.css | 27 +- docs/build/html/_static/doctools.js | 2 +- .../html/_static/documentation_options.js | 5 +- docs/build/html/_static/language_data.js | 2 +- docs/build/html/_static/searchtools.js | 34 +- docs/build/html/genindex.html | 83 ++- docs/build/html/index.html | 58 +- docs/build/html/modules.html | 27 +- docs/build/html/objects.inv | Bin 3343 -> 3505 bytes docs/build/html/py-modindex.html | 19 +- docs/build/html/quapy.classification.html | 150 ++-- docs/build/html/quapy.data.html | 145 ++-- docs/build/html/quapy.html | 460 ++++++++---- docs/build/html/quapy.method.html | 691 +++++++++++------- docs/build/html/search.html | 19 +- docs/build/html/searchindex.js | 2 +- docs/make.bat | 35 + docs/source/conf.py | 55 ++ docs/source/index.rst | 41 ++ docs/source/modules.rst | 7 + docs/source/quapy.classification.rst | 45 ++ docs/source/quapy.data.rst | 46 ++ docs/source/quapy.method.rst | 61 ++ docs/source/quapy.rst | 80 ++ examples/bayesian_quantification.py | 2 + quapy/functional.py | 160 ++-- quapy/method/aggregative.py | 291 ++++---- 29 files changed, 1714 insertions(+), 855 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/modules.rst create mode 100644 docs/source/quapy.classification.rst create mode 100644 docs/source/quapy.data.rst create mode 100644 docs/source/quapy.method.rst create mode 100644 docs/source/quapy.rst diff --git a/TODO.txt b/TODO.txt index d3f2b3d..6547a5b 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,3 +1,5 @@ +check sphinks doc for enumerations (for example, the doc for ACC) + ensembles seem to be broken; they have an internal model selection which takes the parameters, but since quapy now works with protocols it would need to know the validation set in order to pass something like "protocol: APP(val, etc.)" diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/build/html/_static/basic.css b/docs/build/html/_static/basic.css index f316efc..4e9a9f1 100644 --- a/docs/build/html/_static/basic.css +++ b/docs/build/html/_static/basic.css @@ -4,7 +4,7 @@ * * Sphinx stylesheet -- basic theme. * - * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -237,10 +237,6 @@ a.headerlink { visibility: hidden; } -a:visited { - color: #551A8B; -} - h1:hover > a.headerlink, h2:hover > a.headerlink, h3:hover > a.headerlink, @@ -328,7 +324,6 @@ aside.sidebar { p.sidebar-title { font-weight: bold; } - nav.contents, aside.topic, div.admonition, div.topic, blockquote { @@ -336,7 +331,6 @@ div.admonition, div.topic, blockquote { } /* -- topics ---------------------------------------------------------------- */ - nav.contents, aside.topic, div.topic { @@ -612,7 +606,6 @@ ol.simple p, ul.simple p { margin-bottom: 0; } - aside.footnote > span, div.citation > span { float: left; @@ -674,16 +667,6 @@ dd { margin-left: 30px; } -.sig dd { - margin-top: 0px; - margin-bottom: 0px; -} - -.sig dl { - margin-top: 0px; - margin-bottom: 0px; -} - dl > dd:last-child, dl > dd:last-child > :last-child { margin-bottom: 0; @@ -752,14 +735,6 @@ abbr, acronym { cursor: help; } -.translated { - background-color: rgba(207, 255, 207, 0.2) -} - -.untranslated { - background-color: rgba(255, 207, 207, 0.2) -} - /* -- code displays --------------------------------------------------------- */ pre { diff --git a/docs/build/html/_static/doctools.js b/docs/build/html/_static/doctools.js index 4d67807..527b876 100644 --- a/docs/build/html/_static/doctools.js +++ b/docs/build/html/_static/doctools.js @@ -4,7 +4,7 @@ * * Base JavaScript utilities for all Sphinx HTML documentation. * - * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/build/html/_static/documentation_options.js b/docs/build/html/_static/documentation_options.js index 4099efb..17fd07e 100644 --- a/docs/build/html/_static/documentation_options.js +++ b/docs/build/html/_static/documentation_options.js @@ -1,5 +1,6 @@ -const DOCUMENTATION_OPTIONS = { - VERSION: '0.1.8', +var DOCUMENTATION_OPTIONS = { + URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), + VERSION: '0.1.9', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/docs/build/html/_static/language_data.js b/docs/build/html/_static/language_data.js index 017600c..2e22b06 100644 --- a/docs/build/html/_static/language_data.js +++ b/docs/build/html/_static/language_data.js @@ -5,7 +5,7 @@ * This script contains the language-specific data used by searchtools.js, * namely the list of stopwords, stemmer, scorer and splitter. * - * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/build/html/_static/searchtools.js b/docs/build/html/_static/searchtools.js index 8bb1af5..e89e34d 100644 --- a/docs/build/html/_static/searchtools.js +++ b/docs/build/html/_static/searchtools.js @@ -4,7 +4,7 @@ * * Sphinx JavaScript utilities for the full-text search. * - * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -57,12 +57,12 @@ const _removeChildren = (element) => { const _escapeRegExp = (string) => string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string -const _displayItem = (item, searchTerms, highlightTerms) => { +const _displayItem = (item, searchTerms) => { const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docUrlRoot = DOCUMENTATION_OPTIONS.URL_ROOT; const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; - const contentRoot = document.documentElement.dataset.content_root; const [docName, title, anchor, descr, score, _filename] = item; @@ -75,24 +75,20 @@ const _displayItem = (item, searchTerms, highlightTerms) => { if (dirname.match(/\/index\/$/)) dirname = dirname.substring(0, dirname.length - 6); else if (dirname === "index/") dirname = ""; - requestUrl = contentRoot + dirname; + requestUrl = docUrlRoot + dirname; linkUrl = requestUrl; } else { // normal html builders - requestUrl = contentRoot + docName + docFileSuffix; + requestUrl = docUrlRoot + docName + docFileSuffix; linkUrl = docName + docLinkSuffix; } let linkEl = listItem.appendChild(document.createElement("a")); linkEl.href = linkUrl + anchor; linkEl.dataset.score = score; linkEl.innerHTML = title; - if (descr) { + if (descr) listItem.appendChild(document.createElement("span")).innerHTML = " (" + descr + ")"; - // highlight search terms in the description - if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js - highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); - } else if (showSearchSummary) fetch(requestUrl) .then((responseData) => responseData.text()) @@ -101,9 +97,6 @@ const _displayItem = (item, searchTerms, highlightTerms) => { listItem.appendChild( Search.makeSearchSummary(data, searchTerms) ); - // highlight search terms in the summary - if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js - highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); }); Search.output.appendChild(listItem); }; @@ -122,15 +115,14 @@ const _finishSearch = (resultCount) => { const _displayNextItem = ( results, resultCount, - searchTerms, - highlightTerms, + searchTerms ) => { // results left, load the summary and display it // this is intended to be dynamic (don't sub resultsCount) if (results.length) { - _displayItem(results.pop(), searchTerms, highlightTerms); + _displayItem(results.pop(), searchTerms); setTimeout( - () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + () => _displayNextItem(results, resultCount, searchTerms), 5 ); } @@ -164,7 +156,7 @@ const Search = { const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() }); const docContent = htmlElement.querySelector('[role="main"]'); - if (docContent) return docContent.textContent; + if (docContent !== undefined) return docContent.textContent; console.warn( "Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template." ); @@ -288,9 +280,9 @@ const Search = { let results = []; _removeChildren(document.getElementById("search-progress")); - const queryLower = query.toLowerCase().trim(); + const queryLower = query.toLowerCase(); for (const [title, foundTitles] of Object.entries(allTitles)) { - if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { + if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) { for (const [file, id] of foundTitles) { let score = Math.round(100 * queryLower.length / title.length) results.push([ @@ -368,7 +360,7 @@ const Search = { // console.info("search results:", Search.lastresults); // print the results - _displayNextItem(results, results.length, searchTerms, highlightTerms); + _displayNextItem(results, results.length, searchTerms); }, /** diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html index c2451c9..9b2dce2 100644 --- a/docs/build/html/genindex.html +++ b/docs/build/html/genindex.html @@ -1,22 +1,23 @@ - + - Index — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + Index — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + - - - - - + + + + + + @@ -136,6 +137,8 @@
  • (quapy.method.aggregative.ACC method)
  • (quapy.method.aggregative.AggregativeQuantifier method) +
  • +
  • (quapy.method.aggregative.BayesianCC method)
  • (quapy.method.aggregative.CC method)
  • @@ -174,6 +177,8 @@
  • (quapy.method.aggregative.ACC method)
  • (quapy.method.aggregative.AggregativeQuantifier method) +
  • +
  • (quapy.method.aggregative.BayesianCC method)
  • (quapy.method.aggregative.CC method)
  • @@ -221,6 +226,8 @@
  • BANDWIDTH_METHOD (quapy.method._kdey.KDEBase attribute)
  • BaseQuantifier (class in quapy.method.base) +
  • +
  • BayesianCC (class in quapy.method.aggregative)
  • BCTSCalibration (class in quapy.classification.calibration)
  • @@ -284,11 +291,15 @@
  • ClassifyAndCount (in module quapy.method.aggregative)
  • - - +
      +
    • clip_prevalence() (in module quapy.functional) +
    • +
    • CLIPPING (quapy.method.aggregative.ACC attribute)
    • CNNnet (class in quapy.classification.neural)
    • @@ -309,6 +320,8 @@
    • ConfigStatus (class in quapy.model_selection)
    • counts() (quapy.data.base.LabelledCollection method) +
    • +
    • counts_from_labels() (in module quapy.functional)
    • create_if_not_exist() (in module quapy.util)
    • @@ -472,6 +485,8 @@
    • (quapy.method.non_aggregative.DMx method)
    • (quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation method) +
    • +
    • (quapy.method.non_aggregative.ReadMe method)
    • (quapy.model_selection.GridSearchQ method)
    • @@ -505,6 +520,8 @@
      +
    • sample_from_posterior() (quapy.method.aggregative.BayesianCC method) +
    • samples_parameters() (quapy.protocol.AbstractStochasticSeededProtocol method)
    • Status (class in quapy.model_selection) +
    • +
    • std_constrained_linear_ls() (quapy.method.non_aggregative.ReadMe method)
    • strprev() (in module quapy.functional)
    • @@ -1261,6 +1298,16 @@
      • train_test (quapy.data.base.Dataset property)
      • +
      • training (quapy.classification.neural.CNNnet attribute) + +
      • transform() (quapy.classification.methods.LowRankLogisticRegression method)
          diff --git a/docs/build/html/index.html b/docs/build/html/index.html index 89d92c8..7d09502 100644 --- a/docs/build/html/index.html +++ b/docs/build/html/index.html @@ -1,24 +1,24 @@ - + - Welcome to QuaPy’s documentation! — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + Welcome to QuaPy’s documentation! — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + - - - - - - + + + + + + @@ -73,21 +73,21 @@
          -

          Welcome to QuaPy’s documentation!

          +

          Welcome to QuaPy’s documentation!

          QuaPy is a Python-based open-source framework for quantification.

          This document contains the API of the modules included in QuaPy.

          -

          Installation

          +

          Installation

          pip install quapy

          -

          GitHub

          +

          GitHub

          QuaPy is hosted in GitHub at https://github.com/HLT-ISTI/QuaPy

          -

          Contents

          +

          Contents

          -

          Indices and tables

          +

          Indices and tables

          • Index

          • Module Index

          • diff --git a/docs/build/html/modules.html b/docs/build/html/modules.html index 4942493..96bd5b3 100644 --- a/docs/build/html/modules.html +++ b/docs/build/html/modules.html @@ -1,24 +1,24 @@ - + - quapy — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + quapy — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + - - - - - - + + + + + + @@ -77,7 +77,7 @@
            -

            quapy

            +

            quapy

            • quapy package
                @@ -157,15 +157,19 @@
              • argmin_prevalence()
              • as_binary_prevalence()
              • check_prevalence_vector()
              • +
              • clip_prevalence()
              • +
              • counts_from_labels()
              • get_divergence()
              • get_nprevpoints_approximation()
              • linear_search()
              • +
              • map_onto_probability_simplex()
              • normalize_prevalence()
              • num_prevalence_combinations()
              • optim_minimize()
              • prevalence_from_labels()
              • prevalence_from_probabilities()
              • prevalence_linspace()
              • +
              • solve_adjustment()
              • strprev()
              • uniform_prevalence_sampling()
              • uniform_simplex_sampling()
              • @@ -267,6 +271,7 @@
              • get_quapy_home()
              • map_parallel()
              • parallel()
              • +
              • parallel_unpack()
              • pickled_resource()
              • save_text_file()
              • temp_seed()
              • diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv index 35f168130d4d3c1e21a165886f1409a2edba1787..c545ad3f0d4b554cf7e42ca2a2c3efe7aff55be4 100644 GIT binary patch delta 3364 zcmV+<4cqdM8nGLYu7Ayv;y4n3@BI{P>|Wc^T@ibkTNTh&_!Ad6a zKg-WP|Eu%)Q~7BaVkXD^>mJu$uAE$jmW#NxRu2}S; zYEiiUyEIFV);RtD!Pk3xhwC zB`4}SsM4P2Lw|JJ>q?eHc?;$^5x3U`)G=Bsg6Rg=)rk+Zq2xFf3&s;)RVm4N6(9XO zO9>0Q<8ex)XUAx;f{2BALQ;uHGAB_)(Fa1m^{D$1O=STg)$s-`@-*B} zfBIK`uK-~DKSlzL!^hY)j%kEqrl_7M+(%Rs(A>oio?L>nGk>Lsr zzI7ioWuFkY)?EQ!x`T%zJlC=_f0%i6^x=;3SkfY}0TuKB1TJh$)OUAZ?(V!ODqK?X zF1#4>ol05WAv!SLF}E~?it(xsgn9+3>VL3yzTJ=y2D)?3c5`E<3FdYMIROPYiqJdb zRsrt{(tP9SK|_~j1zowfU>Z_sH}XChJ33r-TY^Fs=SeDjNNN;IBg~_!KCT!^Hu6p` zXtc1SaI&A+vFkvkV3}`Qm%S1$vY=3`-GxPlx$sb~&E_P{N#R2%)&ae}0}##`Z-2d` zuedtEbD`W!60{dSQDxE+;4pL$Ar@C#T)X&cTb9LF8@z7J6ekuUZ*kQA;>4z|3Bf6H zZM8x;Rq+nTs#`^198O}Dd#{Nf(4S&w_SDMmsU@6S(e17l78hI9(yo#Ko&@Pcb|$yG z*tcvpEhZAiQ12_DrgmCAt#?@^siwHv}}@-vnRMjVB4^hYkF>Kp2tR5rHX+fev2 zp($$4T-nGARL*?5Bf=F4-)iD$t!_};^`AN=rkO_>iW%2(cI-lCbsnG>zJDVX1o+ZH zyB1D$*)v9UF>t05>m|r9m9FyS`pmc;}~)l z5kU0D6I*e~br~$Bb`z7e;(yoYJsn7uXIf#Z+xF97Uw&~W=2u^$w}@ zmkDykiCD2XLPc`x8hn@0>2wg(-F>ChkFTTZ52tfPHcJH6`Tl&|TM`Zz^fKGRXkKUlP60^Mr{Qb=8G_{8 zkI#V|d(bgXB}WrUy2^|CVwVAU;8OBFNY|KCl_EXsLKal)>7c=VHA`JRkFvc_qpqH7 z^=+8L>n^7v`%%q-TW9cRpd%6~LsTYUw}A$}PH>*=o;bHu;F?Ld#0%*1L;J^p2NWMJ zq9G!NQGxrNYJW@i-sHEVYs;YX4LrV8cTwQm3<&h$BRN>@RC+7x-pfBaLTwB zyIAdp9$GF?eFMcMbV`=xdZu5ezNA7K9YTdy=Y#5Wuz#HmI3@0lwjL%@(l9ovg8+=A zH^&qfQLGV`;fkiq-+e8$o%&d6I|Z`Tb_!{!{^*BCt(3I-^VF~ZkUm<|n=_dXhEHS9 zcGjfP{ZBNQ^`8MyAdOxJ!8AHZimEx+4%TQ7+hin^xa_i<{p4G7p!8^r$DU85Q_?U^ zSqA|$<$uX$Lf;IRTsdn*dM#O@L?rCVqAT zGZV0t>LzNx6O9%65*g{=<%ruSJ7?m0;Lw4f^hqeV0&CidK2Ei|E9ouBjl& z+KFy8|%$9hBYgfDL~KYWX`~)03*>e8c(yIM-lZuBRMU zYogi|sy#AQDZBrUqa#Yt8*ME8%0LJndmCFjn=lT}X~MX=(h18j<^9uP$UEj1b8&ex zq;DjqE5@S9e$lPD*T5BJxn5c4JTZ z&S^9)9>o1&U5%P-FegHy;sUQSoN+x1z{Szmf^)%hEhxgHG>*4CWQV~u=?Jst54q^LC7YlD@b))fLTYdCoy=Ee*kCtdg8-cX)mXkK^;n{Ey%F7*X1H4j*<0pR*niOob&##2 z50ozDB=&xNBxtR8Y!mUuE-`ED8?(j^F>9={3GVNWPwD9?wf1dvi4r{bXD`oVH2Zzh zpTZTc+AM1w(ylV&>gxhd&K=I-glEN}Py2235mMPrvF*ghSzc`X`NhSbUwr-f#lfFn z$^&K`{W+JkeKMpk4*HnKTYoiHWLJ$@Tpn_wAN!;Wlro9PhKg2+T3uF|k{llSZzUPA z_f#L>j}|m0Zc>dRi`ER?6N07m*r*RZKh)tsPDAb(&{qRpQkVa}A4?pOr%8G)-rsul+yjK*UD`J$YIH delta 3200 zcmV-`41e>n8;=^0u75#y;y4n3@AE6zw|i}Gci!8}+|oc#@+J^Eke;5Cp0EXIVoM%L zCNQV@8~gY6m#kzPVuJunD$<8^C)o9sR4SFqvLya2aK7#3+n+O@6e;O_?EPsziB>90 z|5<(Z;a|NEKUJT`DVCBhXp9x*%p%slp%W#FaZ2anO2p`Me}6XZw>6Y^_dcCb(1|?$ z{OwBQC$jsCBoo2scurF)6^;9~3=!|E?K76-{WVqyB9T#@3!W39w(qBXK~?FGM6G!8 zu4+-d{!yBhKx>@-2<^Eb360ecRE=_T4`czZ6?z~8PgPe56nbk~^uS-AFnf96%hKRa zWGRTg5301M`F{}I_Og;C(cXdu&gAuV0dF6(u|_GQt!h*_3-{!7)jORs)#TH5W7YQHuYy87> zHUhJ84ZEI^cM8C*yaQj$%;WOEx6ySbFlE8X>UnsSkbgCeuU-d;api&r@a8_H2xvs< zrUhm=xu8qOd2KxQUIvu!r3iT3ie_Y^-n5LNBH|G?N~n&EX6TjID*5%o@3AV~0;?)2v{ZI(iGaAGQOY>XGaZ-(mAtjdU zOq+E9lz)lVv}pQmw4(;p{eY%N=$M*VfEERd_w#(=mES7>7@H4~Kx5<~cCDY=6O?i@ zuVvU+DM9CjA|3IGb|6cen~zVlba`oBPvCioMJM~ioAep6Xqu-~O_iCe(0$hyL%VV2 zlHEs7Mtp0~=65-+;h{O^LpC^YP{;h@!(KC9g7wp zL+BW<`#@}7mDC;9&9__f(Y@)Mv)#OvZGZB&TS1OM5l#~H%7s(FyMVMmI0n$rxfwfO z?lqW}RN9Zc55}GjS3j1Zn5RX?WC%%*Vl2TTsq5p5QDmcT)Pg1pH;N$pi5<5NR0&r3 zwsYBQ;W7(~<=S6Z=2*x8<;HAY(t?yegi;gGyE_08f{V^e*UGyiJeS(d6hV98BY#yk zEfJ1m4-w*Ux5K@QPp)NId~(5?#$-5i5P3VW#69PIb3Rz&L`W zI`>|aFkm>v&Fsv{?#vM`oakVP&?yg%`DcOzmCKBE0 zM=G5I5iy`9o9Z@%HRNYJmz;PC+#w{)l)* z;+KXvTI&bYZvCf0F>i$v-3!IiqXJ|PZa%J|zd7CKL6X$O)r}LTep)?_an*$rp{~A&UJ|kmk&S*G4DGujnuznV z(dXOo;J5q1><*2Fv%ANE{eOk91JHDO{lU5S?2fdIZd4)4PM32=?C)?Kn0fV3?FHEO zr@rRaq7hy$1zFY!fNufzlyW(Pb@r$`YbmgfMp!ra#uysOBJ=0n4ZMwWH-IosgA#0< z(SvtoMh_5`X;6GCb8@M8ryx_WANpG0F;Sxm;Aj#H;@o0cci8D=aeof1lWTv_f`l~JO}M>oqJ+xWC!`*- zX8-vZS{#i4w9jn!_kW}Llm2yL>mmRY(CIGl(JNJ}H=!%5gK~SjmMgb3m}3V=$$%KS zZ;3VW4Eqg2bldMc*5yFj-_cKjJYDP%r?ZENB7JSdx zmsNr~ZH+N0hbw#iAXaMfiu z`$>rQKpD^&cY}~fr=?+VEp<(Wkk#F)gO@D9O^Cq5Vv(RA7XahsI^dxMC zX970GGXb9CnE+4nOv3C0W+q@OHBTJ8(!7)HR6{8C^kEQ67>xqjRZGLPYZC;}u8h16 zA4cwP4i6a9Z=>5G1m)MNokB5RAs|maRXTAvMVI;Xa$~1iSmgZ-Y$5nO@7gfFlOGkU ze4-7j)_-nb3z7YWFd}FSfC!k~v*LVP>5voDOxo?6R(5lI{U7#pd?OkS>jcAHYc%wi z9QuTcO0>?KeMBb#b!C!dbLaI_E!pVX?Zf>5O&%Za`}f1C$H3Ue9lT0(>ke&{M?Yu- z{NK*BIe9Y~39XfHuWVCfSk=N_yV(%ltp)c=&3}puGB@1oG8^s&!0b0rcE15O{0*q( zZ$eE^>W=dToBb-j!MZfjeq+NHC53}eJH%0DE!V{7rNg79* zbbs2zcOVz_Ia|=>RAE)fW0Rg&+7gZyn5NcKhFl=Bh|Tc|?v!5yO{V3>v#-w8Xvju$ zA{8oc2r9z~&v5{p9X}j6XFSV+A_Gd}>0Gr(44}-?!3SFLH6;+W~}Za(~PcwA792Bfb>8$Qv$jNj^`x>Kv5J&Gj*Y zXilq*wVk!J1ed8!CsJ2Yog6K9JE6PD3I0OZ+-!C&bem*2*7`L^(1q#@DYvz|i3zTF z%)9T{-U&pvlgY&n*u1&9X>{i@=X-riy4nEmB_t~+BZ>&=2vA(%)J0})PREPYq<>-7 zjutwQ^j1C9f3C235S|i}lwUvOS(Hy(*+v&*`JOc38K!JRH?}$24npphITrRbLKEcb==WNeb`p2LJ`i+PJa&nA zW1pBc4vkr3kC-*q*@Ou9#z%I1EPtYV8=a#>Ps7>s(-_TupA4sPg)8%7kC1kenbwy} zI61W~-V4vmQ%3ene-os$n{wfVi?ckt`17-mKR<{1^RtIPKUYUPc=~gpX!m5u+#C(D zNVj^d$S)eRyge4gY~Z2`#JECaLuIE#ttqQbN!~W^c9Kl^D>LUsqXkWgpMO-V$g(rT z^n~(HwBlKJ)fujy4Ycd0orT;5X#A3vM`B4{h`4+4Bwmy9+(9>kDw5^;5r`z+C1^%? zalu8F=7pm9uE~Fw7kW}9KPNoyefZQnxKzqi;zp`ELetz{ty8W9%1^61%hNl5db_Qr zHtzV1q_JsO#d}{1JlX#5?|J{q-tHT6%BiS-{G>aMMN_{0u)9#9>ca*qomFR(O|A7u z%{nv~7vPhvjvlP7 - + - Python Module Index — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + Python Module Index — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + - - - - - + + + + + + diff --git a/docs/build/html/quapy.classification.html b/docs/build/html/quapy.classification.html index b181a3b..95da4d7 100644 --- a/docs/build/html/quapy.classification.html +++ b/docs/build/html/quapy.classification.html @@ -1,23 +1,24 @@ - + - quapy.classification package — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + quapy.classification package — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + - - - - - + + + + + + @@ -95,15 +96,15 @@
                -

                quapy.classification package

                +

                quapy.classification package

                -

                Submodules

                +

                Submodules

                -

                quapy.classification.calibration module

                +

                quapy.classification.calibration module

                -class quapy.classification.calibration.BCTSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]
                +class quapy.classification.calibration.BCTSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]

                Bases: RecalibratedProbabilisticClassifierBase

                Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from abstention.calibration, as defined in Alexandari et al. paper:

                @@ -124,7 +125,7 @@ training set afterwards. Default value is 5.

                -class quapy.classification.calibration.NBVSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]
                +class quapy.classification.calibration.NBVSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]

                Bases: RecalibratedProbabilisticClassifierBase

                Applies the No-Bias Vector Scaling (NBVS) calibration method from abstention.calibration, as defined in Alexandari et al. paper:

                @@ -145,7 +146,7 @@ training set afterwards. Default value is 5.

                -class quapy.classification.calibration.RecalibratedProbabilisticClassifier[source]
                +class quapy.classification.calibration.RecalibratedProbabilisticClassifier[source]

                Bases: object

                Abstract class for (re)calibration method from abstention.calibration, as defined in Alexandari, A., Kundaje, A., & Shrikumar, A. (2020, November). Maximum likelihood with bias-corrected calibration @@ -154,7 +155,7 @@ is hard-to-beat at label shift adaptation. In International Conference on Machin

                -class quapy.classification.calibration.RecalibratedProbabilisticClassifierBase(classifier, calibrator, val_split=5, n_jobs=None, verbose=False)[source]
                +class quapy.classification.calibration.RecalibratedProbabilisticClassifierBase(classifier, calibrator, val_split=5, n_jobs=None, verbose=False)[source]

                Bases: BaseEstimator, RecalibratedProbabilisticClassifier

                Applies a (re)calibration method from abstention.calibration, as defined in Alexandari et al. paper.

                @@ -174,7 +175,7 @@ training set afterwards. Default value is 5.

                -property classes_
                +property classes_

                Returns the classes on which the classifier has been trained on

                Returns:
                @@ -185,7 +186,7 @@ training set afterwards. Default value is 5.

                -fit(X, y)[source]
                +fit(X, y)[source]

                Fits the calibration for the probabilistic classifier.

                Parameters:
                @@ -202,7 +203,7 @@ training set afterwards. Default value is 5.

                -fit_cv(X, y)[source]
                +fit_cv(X, y)[source]

                Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all training instances via cross-validation, and then retrains the classifier on all training instances. The posterior probabilities thus generated are used for calibrating the outputs of the classifier.

                @@ -221,7 +222,7 @@ The posterior probabilities thus generated are used for calibrating the outputs
                -fit_tr_val(X, y)[source]
                +fit_tr_val(X, y)[source]

                Fits the calibration in a train/val-split manner, i.e.t, it partitions the training instances into a training and a validation set, and then uses the training samples to learn classifier which is then used to generate posterior probabilities for the held-out validation data. These posteriors are used to calibrate @@ -241,7 +242,7 @@ the classifier. The classifier is not retrained on the whole dataset.

                -predict(X)[source]
                +predict(X)[source]

                Predicts class labels for the data instances in X

                Parameters:
                @@ -255,7 +256,7 @@ the classifier. The classifier is not retrained on the whole dataset.

                -predict_proba(X)[source]
                +predict_proba(X)[source]

                Generates posterior probabilities for the data instances in X

                Parameters:
                @@ -271,7 +272,7 @@ the classifier. The classifier is not retrained on the whole dataset.

                -class quapy.classification.calibration.TSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]
                +class quapy.classification.calibration.TSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]

                Bases: RecalibratedProbabilisticClassifierBase

                Applies the Temperature Scaling (TS) calibration method from abstention.calibration, as defined in Alexandari et al. paper:

                @@ -292,7 +293,7 @@ training set afterwards. Default value is 5.

                -class quapy.classification.calibration.VSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]
                +class quapy.classification.calibration.VSCalibration(classifier, val_split=5, n_jobs=None, verbose=False)[source]

                Bases: RecalibratedProbabilisticClassifierBase

                Applies the Vector Scaling (VS) calibration method from abstention.calibration, as defined in Alexandari et al. paper:

                @@ -313,10 +314,10 @@ training set afterwards. Default value is 5.

                -

                quapy.classification.methods module

                +

                quapy.classification.methods module

                -class quapy.classification.methods.LowRankLogisticRegression(n_components=100, **kwargs)[source]
                +class quapy.classification.methods.LowRankLogisticRegression(n_components=100, **kwargs)[source]

                Bases: BaseEstimator

                An example of a classification method (i.e., an object that implements fit, predict, and predict_proba) that also generates embedded inputs (i.e., that implements transform), as those required for @@ -335,7 +336,7 @@ while classification is performed using

                -fit(X, y)[source]
                +fit(X, y)[source]

                Fit the model according to the given training data. The fit consists of fitting TruncatedSVD and then LogisticRegression on the low-rank representation.

                @@ -353,7 +354,7 @@ fitting TruncatedSVD and then LogisticRegression on th
                -get_params()[source]
                +get_params()[source]

                Get hyper-parameters for this estimator.

                Returns:
                @@ -364,7 +365,7 @@ fitting TruncatedSVD and then LogisticRegression on th
                -predict(X)[source]
                +predict(X)[source]

                Predicts labels for the instances X embedded into the low-rank space.

                Parameters:
                @@ -379,7 +380,7 @@ instances in X

                -predict_proba(X)[source]
                +predict_proba(X)[source]

                Predicts posterior probabilities for the instances X embedded into the low-rank space.

                Parameters:
                @@ -393,7 +394,7 @@ instances in X

                -set_params(**params)[source]
                +set_params(**params)[source]

                Set the parameters of this estimator.

                Parameters:
                @@ -406,7 +407,7 @@ and eventually also n_components for TruncatedSVD

                -transform(X)[source]
                +transform(X)[source]

                Returns the low-rank approximation of X with n_components dimensions, or X unaltered if n_components >= X.shape[1].

                @@ -423,10 +424,10 @@ and eventually also n_components for TruncatedSVD

                -

                quapy.classification.neural module

                +

                quapy.classification.neural module

                -class quapy.classification.neural.CNNnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5)[source]
                +class quapy.classification.neural.CNNnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, kernel_heights=[3, 5, 7], stride=1, padding=0, drop_p=0.5)[source]

                Bases: TextClassifierNet

                An implementation of quapy.classification.neural.TextClassifierNet based on Convolutional Neural Networks.

                @@ -448,7 +449,7 @@ consecutive tokens that each kernel covers

                -document_embedding(input)[source]
                +document_embedding(input)[source]

                Embeds documents (i.e., performs the forward pass up to the next-to-last layer).

                @@ -466,7 +467,7 @@ dimensionality of the embedding

                -get_params()[source]
                +get_params()[source]

                Get hyper-parameters for this estimator

                Returns:
                @@ -475,9 +476,14 @@ dimensionality of the embedding

                +
                +
                +training: bool
                +
                +
                -property vocabulary_size
                +property vocabulary_size

                Return the size of the vocabulary

                Returns:
                @@ -490,7 +496,7 @@ dimensionality of the embedding

                -class quapy.classification.neural.LSTMnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1, drop_p=0.5)[source]
                +class quapy.classification.neural.LSTMnet(vocabulary_size, n_classes, embedding_size=100, hidden_size=256, repr_size=100, lstm_class_nlayers=1, drop_p=0.5)[source]

                Bases: TextClassifierNet

                An implementation of quapy.classification.neural.TextClassifierNet based on Long Short Term Memory networks.

                @@ -509,7 +515,7 @@ Long Short Term Memory networks.

                -document_embedding(x)[source]
                +document_embedding(x)[source]

                Embeds documents (i.e., performs the forward pass up to the next-to-last layer).

                @@ -527,7 +533,7 @@ dimensionality of the embedding

                -get_params()[source]
                +get_params()[source]

                Get hyper-parameters for this estimator

                Returns:
                @@ -536,9 +542,14 @@ dimensionality of the embedding

                +
                +
                +training: bool
                +
                +
                -property vocabulary_size
                +property vocabulary_size

                Return the size of the vocabulary

                Returns:
                @@ -551,7 +562,7 @@ dimensionality of the embedding

                -class quapy.classification.neural.NeuralClassifierTrainer(net: TextClassifierNet, lr=0.001, weight_decay=0, patience=10, epochs=200, batch_size=64, batch_size_test=512, padding_length=300, device='cuda', checkpointpath='../checkpoint/classifier_net.dat')[source]
                +class quapy.classification.neural.NeuralClassifierTrainer(net: TextClassifierNet, lr=0.001, weight_decay=0, patience=10, epochs=200, batch_size=64, batch_size_test=512, padding_length=300, device='cuda', checkpointpath='../checkpoint/classifier_net.dat')[source]

                Bases: object

                Trains a neural network for text classification.

                @@ -574,7 +585,7 @@ according to the evaluation in the held-out validation split (default ‘../chec
                -property device
                +property device

                Gets the device in which the network is allocated

                Returns:
                @@ -585,7 +596,7 @@ according to the evaluation in the held-out validation split (default ‘../chec
                -fit(instances, labels, val_split=0.3)[source]
                +fit(instances, labels, val_split=0.3)[source]

                Fits the model according to the given training data.

                Parameters:
                @@ -603,7 +614,7 @@ according to the evaluation in the held-out validation split (default ‘../chec
                -get_params()[source]
                +get_params()[source]

                Get hyper-parameters for this estimator

                Returns:
                @@ -614,7 +625,7 @@ according to the evaluation in the held-out validation split (default ‘../chec
                -predict(instances)[source]
                +predict(instances)[source]

                Predicts labels for the instances

                Parameters:
                @@ -629,7 +640,7 @@ instances in X

                -predict_proba(instances)[source]
                +predict_proba(instances)[source]

                Predicts posterior probabilities for the instances

                Parameters:
                @@ -643,7 +654,7 @@ instances in X

                -reset_net_params(vocab_size, n_classes)[source]
                +reset_net_params(vocab_size, n_classes)[source]

                Reinitialize the network parameters

                Parameters:
                @@ -657,7 +668,7 @@ instances in X

                -set_params(**params)[source]
                +set_params(**params)[source]

                Set the parameters of this trainer and the learner it is training. In this current version, parameter names for the trainer and learner should be disjoint.

                @@ -670,7 +681,7 @@ be disjoint.

                -transform(instances)[source]
                +transform(instances)[source]

                Returns the embeddings of the instances

                Parameters:
                @@ -687,12 +698,12 @@ where embed_size is defined by the classification network

                -class quapy.classification.neural.TextClassifierNet(*args, **kwargs)[source]
                +class quapy.classification.neural.TextClassifierNet(*args, **kwargs)[source]

                Bases: Module

                Abstract Text classifier (torch.nn.Module)

                -dimensions()[source]
                +dimensions()[source]

                Gets the number of dimensions of the embedding space

                Returns:
                @@ -703,7 +714,7 @@ where embed_size is defined by the classification network

                -abstract document_embedding(x)[source]
                +abstract document_embedding(x)[source]

                Embeds documents (i.e., performs the forward pass up to the next-to-last layer).

                @@ -721,7 +732,7 @@ dimensionality of the embedding

                -forward(x)[source]
                +forward(x)[source]

                Performs the forward pass.

                Parameters:
                @@ -737,7 +748,7 @@ for each of the instances and classes

                -abstract get_params()[source]
                +abstract get_params()[source]

                Get hyper-parameters for this estimator

                Returns:
                @@ -748,7 +759,7 @@ for each of the instances and classes

                -predict_proba(x)[source]
                +predict_proba(x)[source]

                Predicts posterior probabilities for the instances in x

                Parameters:
                @@ -762,9 +773,14 @@ is length of the pad in the batch

                +
                +
                +training: bool
                +
                +
                -property vocabulary_size
                +property vocabulary_size

                Return the size of the vocabulary

                Returns:
                @@ -775,7 +791,7 @@ is length of the pad in the batch

                -xavier_uniform()[source]
                +xavier_uniform()[source]

                Performs Xavier initialization of the network parameters

                @@ -783,7 +799,7 @@ is length of the pad in the batch

                -class quapy.classification.neural.TorchDataset(instances, labels=None)[source]
                +class quapy.classification.neural.TorchDataset(instances, labels=None)[source]

                Bases: Dataset

                Transforms labelled instances into a Torch’s torch.utils.data.DataLoader object

                @@ -796,7 +812,7 @@ is length of the pad in the batch

                -asDataloader(batch_size, shuffle, pad_length, device)[source]
                +asDataloader(batch_size, shuffle, pad_length, device)[source]

                Converts the labelled collection into a Torch DataLoader with dynamic padding for the batch

                @@ -820,10 +836,10 @@ applied, meaning that if the longest document in the batch is shorter than
                -

                quapy.classification.svmperf module

                +

                quapy.classification.svmperf module

                -class quapy.classification.svmperf.SVMperf(svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None)[source]
                +class quapy.classification.svmperf.SVMperf(svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None)[source]

                Bases: BaseEstimator, ClassifierMixin

                A wrapper for the SVM-perf package by Thorsten Joachims. When using losses for quantification, the source code has to be patched. See @@ -848,7 +864,7 @@ for further details.

                -decision_function(X, y=None)[source]
                +decision_function(X, y=None)[source]

                Evaluate the decision function for the samples in X.

                Parameters:
                @@ -865,7 +881,7 @@ for further details.

                -fit(X, y)[source]
                +fit(X, y)[source]

                Trains the SVM for the multivariate performance loss

                Parameters:
                @@ -882,7 +898,7 @@ for further details.

                -predict(X)[source]
                +predict(X)[source]

                Predicts labels for the instances X

                Parameters:
                @@ -897,14 +913,14 @@ instances in X

                -valid_losses = {'01': 0, 'f1': 1, 'kld': 12, 'mae': 26, 'mrae': 27, 'nkld': 13, 'q': 22, 'qacc': 23, 'qf1': 24, 'qgm': 25}
                +valid_losses = {'01': 0, 'f1': 1, 'kld': 12, 'mae': 26, 'mrae': 27, 'nkld': 13, 'q': 22, 'qacc': 23, 'qf1': 24, 'qgm': 25}
                -

                Module contents

                +

                Module contents

                diff --git a/docs/build/html/quapy.data.html b/docs/build/html/quapy.data.html index fd7a730..0f0f06f 100644 --- a/docs/build/html/quapy.data.html +++ b/docs/build/html/quapy.data.html @@ -1,23 +1,24 @@ - + - quapy.data package — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + quapy.data package — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + - - - - - + + + + + + @@ -95,15 +96,15 @@
                -

                quapy.data package

                +

                quapy.data package

                -

                Submodules

                +

                Submodules

                -

                quapy.data.base module

                +

                quapy.data.base module

                -class quapy.data.base.Dataset(training: LabelledCollection, test: LabelledCollection, vocabulary: dict | None = None, name='')[source]
                +class quapy.data.base.Dataset(training: LabelledCollection, test: LabelledCollection, vocabulary: Optional[dict] = None, name='')[source]

                Bases: object

                Abstraction of training and test LabelledCollection objects.

                @@ -118,7 +119,7 @@
                -classmethod SplitStratified(collection: LabelledCollection, train_size=0.6)[source]
                +classmethod SplitStratified(collection: LabelledCollection, train_size=0.6)[source]

                Generates a Dataset from a stratified split of a LabelledCollection instance. See LabelledCollection.split_stratified()

                @@ -136,7 +137,7 @@ See
                -property binary
                +property binary

                Returns True if the training collection is labelled according to two classes

                Returns:
                @@ -147,7 +148,7 @@ See
                -property classes_
                +property classes_

                The classes according to which the training collection is labelled

                Returns:
                @@ -158,7 +159,7 @@ See
                -classmethod kFCV(data: LabelledCollection, nfolds=5, nrepeats=1, random_state=0)[source]
                +classmethod kFCV(data: LabelledCollection, nfolds=5, nrepeats=1, random_state=0)[source]

                Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around LabelledCollection.kFCV() that returns Dataset instances made of training and test folds.

                @@ -177,7 +178,7 @@ See
                -classmethod load(train_path, test_path, loader_func: callable, classes=None, **loader_kwargs)[source]
                +classmethod load(train_path, test_path, loader_func: callable, classes=None, **loader_kwargs)[source]

                Loads a training and a test labelled set of data and convert it into a Dataset instance. The function in charge of reading the instances must be specified. This function can be a custom one, or any of the reading functions defined in quapy.data.reader module.

                @@ -201,7 +202,7 @@ See
                -property n_classes
                +property n_classes

                The number of classes according to which the training collection is labelled

                Returns:
                @@ -212,7 +213,7 @@ See
                -reduce(n_train=100, n_test=100)[source]
                +reduce(n_train=100, n_test=100)[source]

                Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.

                Parameters:
                @@ -229,7 +230,7 @@ See
                -stats(show=True)[source]
                +stats(show=True)[source]

                Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,:

                >>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
                 >>> data.stats()
                @@ -252,7 +253,7 @@ the collection), prevs (the prevalence values for each class)

                -property train_test
                +property train_test

                Alias to self.training and self.test

                Returns:
                @@ -266,7 +267,7 @@ the collection), prevs (the prevalence values for each class)

                -property vocabulary_size
                +property vocabulary_size

                If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary

                Returns:
                @@ -279,7 +280,7 @@ the collection), prevs (the prevalence values for each class)

                -class quapy.data.base.LabelledCollection(instances, labels, classes=None)[source]
                +class quapy.data.base.LabelledCollection(instances, labels, classes=None)[source]

                Bases: object

                A LabelledCollection is a set of objects each with a label attached to each of them. This class implements several sampling routines and other utilities.

                @@ -296,7 +297,7 @@ from the labels. The classes must be indicated in cases in which some of the lab
                -property X
                +property X

                An alias to self.instances

                Returns:
                @@ -307,7 +308,7 @@ from the labels. The classes must be indicated in cases in which some of the lab
                -property Xp
                +property Xp

                Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from a LabelledCollection object.

                @@ -319,7 +320,7 @@ a
                -property Xy
                +property Xy

                Gets the instances and labels. This is useful when working with sklearn estimators, e.g.:

                >>> svm = LinearSVC().fit(*my_collection.Xy)
                 
                @@ -333,7 +334,7 @@ a
                -property binary
                +property binary

                Returns True if the number of classes is 2

                Returns:
                @@ -344,7 +345,7 @@ a
                -counts()[source]
                +counts()[source]

                Returns the number of instances for each of the classes in the codeframe.

                Returns:
                @@ -356,7 +357,7 @@ as listed by self.classes_

                -classmethod join(*args: Iterable[LabelledCollection])[source]
                +classmethod join(*args: Iterable[LabelledCollection])[source]

                Returns a new LabelledCollection as the union of the collections given in input.

                Parameters:
                @@ -370,7 +371,7 @@ as listed by self.classes_

                -kFCV(nfolds=5, nrepeats=1, random_state=None)[source]
                +kFCV(nfolds=5, nrepeats=1, random_state=None)[source]

                Generator of stratified folds to be used in k-fold cross validation.

                Parameters:
                @@ -388,7 +389,7 @@ as listed by self.classes_

                -classmethod load(path: str, loader_func: callable, classes=None, **loader_kwargs)[source]
                +classmethod load(path: str, loader_func: callable, classes=None, **loader_kwargs)[source]

                Loads a labelled set of data and convert it into a LabelledCollection instance. The function in charge of reading the instances must be specified. This function can be a custom one, or any of the reading functions defined in quapy.data.reader module.

                @@ -411,7 +412,7 @@ these arguments are used to call loader_func(path, **loader_kwargs)
                -property n_classes
                +property n_classes

                The number of classes

                Returns:
                @@ -422,7 +423,7 @@ these arguments are used to call loader_func(path, **loader_kwargs)
                -property p
                +property p

                An alias to self.prevalence()

                Returns:
                @@ -433,7 +434,7 @@ these arguments are used to call loader_func(path, **loader_kwargs)
                -prevalence()[source]
                +prevalence()[source]

                Returns the prevalence, or relative frequency, of the classes in the codeframe.

                Returns:
                @@ -445,7 +446,7 @@ as listed by self.classes_

                -sampling(size, *prevs, shuffle=True, random_state=None)[source]
                +sampling(size, *prevs, shuffle=True, random_state=None)[source]

                Return a random sample (an instance of LabelledCollection) of desired size and desired prevalence values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than the actual prevalence of the class, or with replacement otherwise.

                @@ -469,7 +470,7 @@ prevalence == prevs if the exact prevalence values can be met as pr
                -sampling_from_index(index)[source]
                +sampling_from_index(index)[source]

                Returns an instance of LabelledCollection whose elements are sampled from this collection using the index.

                @@ -484,7 +485,7 @@ index.

                -sampling_index(size, *prevs, shuffle=True, random_state=None)[source]
                +sampling_index(size, *prevs, shuffle=True, random_state=None)[source]

                Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the prevalence values are not specified, then returns the index of a uniform sampling. For each class, the sampling is drawn with replacement if the requested prevalence is larger than @@ -508,7 +509,7 @@ it is constrained. E.g., for binary collections, only the prevalence p

                -split_random(train_prop=0.6, random_state=None)[source]
                +split_random(train_prop=0.6, random_state=None)[source]

                Returns two instances of LabelledCollection split randomly from this collection, at desired proportion.

                @@ -529,7 +530,7 @@ second one with 1-train_prop elements

                -split_stratified(train_prop=0.6, random_state=None)[source]
                +split_stratified(train_prop=0.6, random_state=None)[source]

                Returns two instances of LabelledCollection split with stratification from this collection, at desired proportion.

                @@ -550,7 +551,7 @@ second one with 1-train_prop elements

                -stats(show=True)[source]
                +stats(show=True)[source]

                Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,:

                >>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
                 >>> data.training.stats()
                @@ -572,7 +573,7 @@ values for each class)

                -uniform_sampling(size, random_state=None)[source]
                +uniform_sampling(size, random_state=None)[source]

                Returns a uniform sample (an instance of LabelledCollection) of desired size. The sampling is drawn with replacement if the requested size is greater than the number of instances, or without replacement otherwise.

                @@ -591,7 +592,7 @@ otherwise.

                -uniform_sampling_index(size, random_state=None)[source]
                +uniform_sampling_index(size, random_state=None)[source]

                Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn with replacement if the requested size is greater than the number of instances, or without replacement otherwise.

                @@ -610,7 +611,7 @@ otherwise.

                -property y
                +property y

                An alias to self.labels

                Returns:
                @@ -623,10 +624,10 @@ otherwise.

                -

                quapy.data.datasets module

                +

                quapy.data.datasets module

                -quapy.data.datasets.fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None)[source]
                +quapy.data.datasets.fetch_IFCB(single_sample_train=True, for_model_selection=False, data_home=None)[source]

                Loads the IFCB dataset for quantification from Zenodo (for more information on this dataset, please follow the zenodo link). This dataset is based on the data available publicly at @@ -658,7 +659,7 @@ i.e., a sampling protocol that returns a series of samples labelled by prevalenc

                -quapy.data.datasets.fetch_UCIBinaryDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) Dataset[source]
                +quapy.data.datasets.fetch_UCIBinaryDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) Dataset[source]

                Loads a UCI dataset as an instance of quapy.data.base.Dataset, as used in Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). Using ensembles for problems with characterizable changes in data distribution: A case study on quantification. @@ -688,7 +689,7 @@ The list of valid dataset names can be accessed in quapy.data.datasets.UCI

                -quapy.data.datasets.fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=False) LabelledCollection[source]
                +quapy.data.datasets.fetch_UCIBinaryLabelledCollection(dataset_name, data_home=None, verbose=False) LabelledCollection[source]

                Loads a UCI collection as an instance of quapy.data.base.LabelledCollection, as used in Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017). Using ensembles for problems with characterizable changes in data distribution: A case study on quantification. @@ -725,7 +726,7 @@ This can be reproduced by using

                -quapy.data.datasets.fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) Dataset[source]
                +quapy.data.datasets.fetch_UCIMulticlassDataset(dataset_name, data_home=None, test_split=0.3, verbose=False) Dataset[source]

                Loads a UCI multiclass dataset as an instance of quapy.data.base.Dataset.

                The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria: - It has more than 1000 instances @@ -758,7 +759,7 @@ This can be reproduced by using

                -quapy.data.datasets.fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=False) LabelledCollection[source]
                +quapy.data.datasets.fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, verbose=False) LabelledCollection[source]

                Loads a UCI multiclass collection as an instance of quapy.data.base.LabelledCollection.

                The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria: - It has more than 1000 instances @@ -791,7 +792,7 @@ This can be reproduced by using

                -quapy.data.datasets.fetch_lequa2022(task, data_home=None)[source]
                +quapy.data.datasets.fetch_lequa2022(task, data_home=None)[source]

                Loads the official datasets provided for the LeQua competition. In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead. @@ -822,7 +823,7 @@ that return a series of samples stored in a directory which are labelled by prev

                -quapy.data.datasets.fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False) Dataset[source]
                +quapy.data.datasets.fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False) Dataset[source]

                Loads a Reviews dataset as a Dataset instance, as used in Esuli, A., Moreo, A., and Sebastiani, F. “A recurrent neural network for sentiment quantification.” Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018.. @@ -848,7 +849,7 @@ faster subsequent invokations

                -quapy.data.datasets.fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_home=None, pickle=False) Dataset[source]
                +quapy.data.datasets.fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_home=None, pickle=False) Dataset[source]

                Loads a Twitter dataset as a quapy.data.base.Dataset instance, as used in: Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis. Social Network Analysis and Mining6(19), 1–22 (2016) @@ -879,15 +880,15 @@ faster subsequent invokations

                -quapy.data.datasets.warn(*args, **kwargs)[source]
                +quapy.data.datasets.warn(*args, **kwargs)[source]
                -

                quapy.data.preprocessing module

                +

                quapy.data.preprocessing module

                -class quapy.data.preprocessing.IndexTransformer(**kwargs)[source]
                +class quapy.data.preprocessing.IndexTransformer(**kwargs)[source]

                Bases: object

                This class implements a sklearn’s-style transformer that indexes text as numerical ids for the tokens it contains, and that would be generated by sklearn’s @@ -901,7 +902,7 @@ contains, and that would be generated by sklearn’s

                -add_word(word, id=None, nogaps=True)[source]
                +add_word(word, id=None, nogaps=True)[source]

                Adds a new token (regardless of whether it has been found in the text or not), with dedicated id. Useful to define special tokens for codifying unknown words, or padding tokens.

                @@ -922,7 +923,7 @@ precedent ids stored so far

                -fit(X)[source]
                +fit(X)[source]

                Fits the transformer, i.e., decides on the vocabulary, given a list of strings.

                Parameters:
                @@ -936,7 +937,7 @@ precedent ids stored so far

                -fit_transform(X, n_jobs=None)[source]
                +fit_transform(X, n_jobs=None)[source]

                Fits the transform on X and transforms it.

                Parameters:
                @@ -953,7 +954,7 @@ precedent ids stored so far

                -transform(X, n_jobs=None)[source]
                +transform(X, n_jobs=None)[source]

                Transforms the strings in X as lists of numerical ids

                Parameters:
                @@ -970,7 +971,7 @@ precedent ids stored so far

                -vocabulary_size()[source]
                +vocabulary_size()[source]

                Gets the length of the vocabulary according to which the document tokens have been indexed

                Returns:
                @@ -983,7 +984,7 @@ precedent ids stored so far

                -quapy.data.preprocessing.index(dataset: Dataset, min_df=5, inplace=False, **kwargs)[source]
                +quapy.data.preprocessing.index(dataset: Dataset, min_df=5, inplace=False, **kwargs)[source]

                Indexes the tokens of a textual quapy.data.base.Dataset of string documents. To index a document means to replace each different token by a unique numerical index. Rare words (i.e., words occurring less than min_df times) are replaced by a special token UNK

                @@ -1007,7 +1008,7 @@ are lists of str

                -quapy.data.preprocessing.reduce_columns(dataset: Dataset, min_df=5, inplace=False)[source]
                +quapy.data.preprocessing.reduce_columns(dataset: Dataset, min_df=5, inplace=False)[source]

                Reduces the dimensionality of the instances, represented as a csr_matrix (or any subtype of scipy.sparse.spmatrix), of training and test documents by removing the columns of words which are not present in at least min_df instances in the training set

                @@ -1030,7 +1031,7 @@ in the training set have been removed

                -quapy.data.preprocessing.standardize(dataset: Dataset, inplace=False)[source]
                +quapy.data.preprocessing.standardize(dataset: Dataset, inplace=False)[source]

                Standardizes the real-valued columns of a quapy.data.base.Dataset. Standardization, aka z-scoring, of a variable X comes down to subtracting the average and normalizing by the standard deviation.

                @@ -1050,7 +1051,7 @@ standard deviation.

                -quapy.data.preprocessing.text2tfidf(dataset: Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs)[source]
                +quapy.data.preprocessing.text2tfidf(dataset: Dataset, min_df=3, sublinear_tf=True, inplace=False, **kwargs)[source]

                Transforms a quapy.data.base.Dataset of textual instances into a quapy.data.base.Dataset of tfidf weighted sparse vectors

                @@ -1074,10 +1075,10 @@ current Dataset (if inplace=True) where the instances are stored in a csr_
                -

                quapy.data.reader module

                +

                quapy.data.reader module

                -quapy.data.reader.binarize(y, pos_class)[source]
                +quapy.data.reader.binarize(y, pos_class)[source]

                Binarizes a categorical array-like collection of labels towards the positive class pos_class. E.g.,:

                >>> binarize([1, 2, 3, 1, 1, 0], pos_class=2)
                 >>> array([0, 1, 0, 0, 0, 0])
                @@ -1099,7 +1100,7 @@ current Dataset (if inplace=True) where the instances are stored in a csr_
                 
                 
                -quapy.data.reader.from_csv(path, encoding='utf-8')[source]
                +quapy.data.reader.from_csv(path, encoding='utf-8')[source]

                Reads a csv file in which columns are separated by ‘,’. File format <label>,<feat1>,<feat2>,…,<featn>

                @@ -1117,7 +1118,7 @@ File format <label>,<feat1>,<feat2>,…,<featn>

                -quapy.data.reader.from_sparse(path)[source]
                +quapy.data.reader.from_sparse(path)[source]

                Reads a labelled collection of real-valued instances expressed in sparse format File format <-1 or 0 or 1>[s col(int):val(float)]

                @@ -1132,7 +1133,7 @@ File format <-1 or 0 or 1>[s col(int):val(float)]

                -quapy.data.reader.from_text(path, encoding='utf-8', verbose=1, class2int=True)[source]
                +quapy.data.reader.from_text(path, encoding='utf-8', verbose=1, class2int=True)[source]

                Reads a labelled colletion of documents. File fomart <0 or 1> <document>

                @@ -1151,7 +1152,7 @@ File fomart <0 or 1> <document>

                -quapy.data.reader.reindex_labels(y)[source]
                +quapy.data.reader.reindex_labels(y)[source]

                Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes. E.g.:

                >>> reindex_labels(['B', 'B', 'A', 'C'])
                @@ -1170,7 +1171,7 @@ E.g.:

                -

                Module contents

                +

                Module contents

                diff --git a/docs/build/html/quapy.html b/docs/build/html/quapy.html index cfe4d60..249ce67 100644 --- a/docs/build/html/quapy.html +++ b/docs/build/html/quapy.html @@ -1,23 +1,24 @@ - + - quapy package — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + quapy package — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + - - - - - + + + + + + @@ -100,15 +101,19 @@
              • argmin_prevalence()
              • as_binary_prevalence()
              • check_prevalence_vector()
              • +
              • clip_prevalence()
              • +
              • counts_from_labels()
              • get_divergence()
              • get_nprevpoints_approximation()
              • linear_search()
              • +
              • map_onto_probability_simplex()
              • normalize_prevalence()
              • num_prevalence_combinations()
              • optim_minimize()
              • prevalence_from_labels()
              • prevalence_from_probabilities()
              • prevalence_linspace()
              • +
              • solve_adjustment()
              • strprev()
              • uniform_prevalence_sampling()
              • uniform_simplex_sampling()
              • @@ -154,6 +159,7 @@
              • get_quapy_home()
              • map_parallel()
              • parallel()
              • +
              • parallel_unpack()
              • pickled_resource()
              • save_text_file()
              • temp_seed()
              • @@ -193,9 +199,9 @@
                -

                quapy package

                +

                quapy package

                -

                Subpackages

                +

                Subpackages

                -

                Submodules

                +

                Submodules

                -

                quapy.error module

                +

                quapy.error module

                Implementation of error measures used for quantification

                -quapy.error.absolute_error(prevs, prevs_hat)
                +quapy.error.absolute_error(prevs, prevs_hat)
                Computes the absolute error between the two prevalence vectors.

                Absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as \(AE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}|\hat{p}(y)-p(y)|\), @@ -637,7 +664,7 @@ where \(\mathcal{Y}\) are the

                -quapy.error.acc_error(y_true, y_pred)
                +quapy.error.acc_error(y_true, y_pred)

                Computes the error in terms of 1-accuracy. The accuracy is computed as \(\frac{tp+tn}{tp+fp+fn+tn}\), with tp, fp, fn, and tn standing for true positives, false positives, false negatives, and true negatives, @@ -657,7 +684,7 @@ respectively

                -quapy.error.acce(y_true, y_pred)[source]
                +quapy.error.acce(y_true, y_pred)[source]

                Computes the error in terms of 1-accuracy. The accuracy is computed as \(\frac{tp+tn}{tp+fp+fn+tn}\), with tp, fp, fn, and tn standing for true positives, false positives, false negatives, and true negatives, @@ -677,7 +704,7 @@ respectively

                -quapy.error.ae(prevs, prevs_hat)[source]
                +quapy.error.ae(prevs, prevs_hat)[source]
                Computes the absolute error between the two prevalence vectors.

                Absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as \(AE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}|\hat{p}(y)-p(y)|\), @@ -699,7 +726,7 @@ where \(\mathcal{Y}\) are the

                -quapy.error.f1_error(y_true, y_pred)
                +quapy.error.f1_error(y_true, y_pred)

                F1 error: simply computes the error in terms of macro \(F_1\), i.e., \(1-F_1^M\), where \(F_1\) is the harmonic mean of precision and recall, defined as \(\frac{2tp}{2tp+fp+fn}\), with tp, fp, and fn standing @@ -721,7 +748,7 @@ and then averaged.

                -quapy.error.f1e(y_true, y_pred)[source]
                +quapy.error.f1e(y_true, y_pred)[source]

                F1 error: simply computes the error in terms of macro \(F_1\), i.e., \(1-F_1^M\), where \(F_1\) is the harmonic mean of precision and recall, defined as \(\frac{2tp}{2tp+fp+fn}\), with tp, fp, and fn standing @@ -743,7 +770,7 @@ and then averaged.

                -quapy.error.from_name(err_name)[source]
                +quapy.error.from_name(err_name)[source]

                Gets an error function from its name. E.g., from_name(“mae”) will return function quapy.error.mae()

                @@ -758,7 +785,7 @@ will return function
                -quapy.error.kld(prevs, prevs_hat, eps=None)[source]
                +quapy.error.kld(prevs, prevs_hat, eps=None)[source]
                Computes the Kullback-Leibler divergence between the two prevalence distributions.

                Kullback-Leibler divergence between two prevalence distributions \(p\) and \(\hat{p}\) is computed as @@ -787,7 +814,7 @@ If eps=None, the sample size will be taken from the environment var

                -quapy.error.mae(prevs, prevs_hat)[source]
                +quapy.error.mae(prevs, prevs_hat)[source]

                Computes the mean absolute error (see quapy.error.ae()) across the sample pairs.

                Parameters:
                @@ -805,7 +832,7 @@ prevalence values

                -quapy.error.mean_absolute_error(prevs, prevs_hat)
                +quapy.error.mean_absolute_error(prevs, prevs_hat)

                Computes the mean absolute error (see quapy.error.ae()) across the sample pairs.

                Parameters:
                @@ -823,7 +850,7 @@ prevalence values

                -quapy.error.mean_normalized_absolute_error(prevs, prevs_hat)
                +quapy.error.mean_normalized_absolute_error(prevs, prevs_hat)

                Computes the mean normalized absolute error (see quapy.error.nae()) across the sample pairs.

                Parameters:
                @@ -841,7 +868,7 @@ prevalence values

                -quapy.error.mean_normalized_relative_absolute_error(prevs, prevs_hat, eps=None)
                +quapy.error.mean_normalized_relative_absolute_error(prevs, prevs_hat, eps=None)

                Computes the mean normalized relative absolute error (see quapy.error.nrae()) across the sample pairs. The distributions are smoothed using the eps factor (see quapy.error.smooth()).

                @@ -866,7 +893,7 @@ the environment variable SAMPLE_SIZE (which has thus to be set befo
                -quapy.error.mean_relative_absolute_error(prevs, prevs_hat, eps=None)
                +quapy.error.mean_relative_absolute_error(prevs, prevs_hat, eps=None)

                Computes the mean relative absolute error (see quapy.error.rae()) across the sample pairs. The distributions are smoothed using the eps factor (see quapy.error.smooth()).

                @@ -891,7 +918,7 @@ the environment variable SAMPLE_SIZE (which has thus to be set befo
                -quapy.error.mkld(prevs, prevs_hat, eps=None)[source]
                +quapy.error.mkld(prevs, prevs_hat, eps=None)[source]

                Computes the mean Kullback-Leibler divergence (see quapy.error.kld()) across the sample pairs. The distributions are smoothed using the eps factor (see quapy.error.smooth()).

                @@ -916,7 +943,7 @@ If eps=None, the sample size will be taken from the environment var
                -quapy.error.mnae(prevs, prevs_hat)[source]
                +quapy.error.mnae(prevs, prevs_hat)[source]

                Computes the mean normalized absolute error (see quapy.error.nae()) across the sample pairs.

                Parameters:
                @@ -934,7 +961,7 @@ prevalence values

                -quapy.error.mnkld(prevs, prevs_hat, eps=None)[source]
                +quapy.error.mnkld(prevs, prevs_hat, eps=None)[source]

                Computes the mean Normalized Kullback-Leibler divergence (see quapy.error.nkld()) across the sample pairs. The distributions are smoothed using the eps factor (see quapy.error.smooth()).

                @@ -958,7 +985,7 @@ If eps=None, the sample size will be taken from the environment var
                -quapy.error.mnrae(prevs, prevs_hat, eps=None)[source]
                +quapy.error.mnrae(prevs, prevs_hat, eps=None)[source]

                Computes the mean normalized relative absolute error (see quapy.error.nrae()) across the sample pairs. The distributions are smoothed using the eps factor (see quapy.error.smooth()).

                @@ -983,7 +1010,7 @@ the environment variable SAMPLE_SIZE (which has thus to be set befo
                -quapy.error.mrae(prevs, prevs_hat, eps=None)[source]
                +quapy.error.mrae(prevs, prevs_hat, eps=None)[source]

                Computes the mean relative absolute error (see quapy.error.rae()) across the sample pairs. The distributions are smoothed using the eps factor (see quapy.error.smooth()).

                @@ -1008,7 +1035,7 @@ the environment variable SAMPLE_SIZE (which has thus to be set befo
                -quapy.error.mse(prevs, prevs_hat)[source]
                +quapy.error.mse(prevs, prevs_hat)[source]

                Computes the mean squared error (see quapy.error.se()) across the sample pairs.

                Parameters:
                @@ -1027,7 +1054,7 @@ predicted prevalence values

                -quapy.error.nae(prevs, prevs_hat)[source]
                +quapy.error.nae(prevs, prevs_hat)[source]
                Computes the normalized absolute error between the two prevalence vectors.

                Normalized absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as \(NAE(p,\hat{p})=\frac{AE(p,\hat{p})}{z_{AE}}\), @@ -1050,7 +1077,7 @@ are the classes of interest.

                -quapy.error.nkld(prevs, prevs_hat, eps=None)[source]
                +quapy.error.nkld(prevs, prevs_hat, eps=None)[source]
                Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.

                Normalized Kullback-Leibler divergence between two prevalence distributions \(p\) and \(\hat{p}\) is computed as @@ -1079,7 +1106,7 @@ size. If eps=None, the sample size will be taken from the environme

                -quapy.error.normalized_absolute_error(prevs, prevs_hat)
                +quapy.error.normalized_absolute_error(prevs, prevs_hat)
                Computes the normalized absolute error between the two prevalence vectors.

                Normalized absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as \(NAE(p,\hat{p})=\frac{AE(p,\hat{p})}{z_{AE}}\), @@ -1102,7 +1129,7 @@ are the classes of interest.

                -quapy.error.normalized_relative_absolute_error(prevs, prevs_hat, eps=None)
                +quapy.error.normalized_relative_absolute_error(prevs, prevs_hat, eps=None)
                Computes the normalized absolute relative error between the two prevalence vectors.

                Relative absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as @@ -1132,7 +1159,7 @@ sample size. If eps=None, the sample size will be taken from the en

                -quapy.error.nrae(prevs, prevs_hat, eps=None)[source]
                +quapy.error.nrae(prevs, prevs_hat, eps=None)[source]
                Computes the normalized absolute relative error between the two prevalence vectors.

                Relative absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as @@ -1162,7 +1189,7 @@ sample size. If eps=None, the sample size will be taken from the en

                -quapy.error.rae(prevs, prevs_hat, eps=None)[source]
                +quapy.error.rae(prevs, prevs_hat, eps=None)[source]
                Computes the absolute relative error between the two prevalence vectors.

                Relative absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as @@ -1191,7 +1218,7 @@ sample size. If eps=None, the sample size will be taken from the en

                -quapy.error.relative_absolute_error(prevs, prevs_hat, eps=None)
                +quapy.error.relative_absolute_error(prevs, prevs_hat, eps=None)
                Computes the absolute relative error between the two prevalence vectors.

                Relative absolute error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as @@ -1220,7 +1247,7 @@ sample size. If eps=None, the sample size will be taken from the en

                -quapy.error.se(prevs, prevs_hat)[source]
                +quapy.error.se(prevs, prevs_hat)[source]
                Computes the squared error between the two prevalence vectors.

                Squared error between two prevalence vectors \(p\) and \(\hat{p}\) is computed as \(SE(p,\hat{p})=\frac{1}{|\mathcal{Y}|}\sum_{y\in \mathcal{Y}}(\hat{p}(y)-p(y))^2\), @@ -1243,7 +1270,7 @@ where

                -quapy.error.smooth(prevs, eps)[source]
                +quapy.error.smooth(prevs, eps)[source]

                Smooths a prevalence distribution with \(\epsilon\) (eps) as: \(\underline{p}(y)=\frac{\epsilon+p(y)}{\epsilon|\mathcal{Y}|+ \displaystyle\sum_{y\in \mathcal{Y}}p(y)}\)

                @@ -1262,10 +1289,10 @@ where
                -

                quapy.evaluation module

                +

                quapy.evaluation module

                -quapy.evaluation.evaluate(model: BaseQuantifier, protocol: AbstractProtocol, error_metric: str | Callable, aggr_speedup: str | bool = 'auto', verbose=False)[source]
                +quapy.evaluation.evaluate(model: BaseQuantifier, protocol: AbstractProtocol, error_metric: Union[str, Callable], aggr_speedup: Union[str, bool] = 'auto', verbose=False)[source]

                Evaluates a quantification model according to a specific sample generation protocol and in terms of one evaluation metric (error).

                @@ -1294,7 +1321,7 @@ a single float

                -quapy.evaluation.evaluate_on_samples(model: BaseQuantifier, samples: Iterable[LabelledCollection], error_metric: str | Callable, verbose=False)[source]
                +quapy.evaluation.evaluate_on_samples(model: BaseQuantifier, samples: Iterable[LabelledCollection], error_metric: Union[str, Callable], verbose=False)[source]

                Evaluates a quantification model on a given set of samples and in terms of one evaluation metric (error).

                Parameters:
                @@ -1316,7 +1343,7 @@ a single float

                -quapy.evaluation.evaluation_report(model: BaseQuantifier, protocol: AbstractProtocol, error_metrics: Iterable[str | Callable] = 'mae', aggr_speedup: str | bool = 'auto', verbose=False)[source]
                +quapy.evaluation.evaluation_report(model: BaseQuantifier, protocol: AbstractProtocol, error_metrics: Iterable[Union[str, Callable]] = 'mae', aggr_speedup: Union[str, bool] = 'auto', verbose=False)[source]

                Generates a report (a pandas’ DataFrame) containing information of the evaluation of the model as according to a specific protocol and in terms of one or more evaluation metrics (errors).

                @@ -1346,7 +1373,7 @@ have been indicated, each displaying the score in terms of that metric for every
                -quapy.evaluation.prediction(model: BaseQuantifier, protocol: AbstractProtocol, aggr_speedup: str | bool = 'auto', verbose=False)[source]
                +quapy.evaluation.prediction(model: BaseQuantifier, protocol: AbstractProtocol, aggr_speedup: Union[str, bool] = 'auto', verbose=False)[source]

                Uses a quantification model to generate predictions for the samples generated via a specific protocol. This function is central to all evaluation processes, and is endowed with an optimization to speed-up the prediction of protocols that generate samples from a large collection. The optimization applies to aggregative @@ -1379,10 +1406,10 @@ convenient or not. Set to False to deactivate.

                -

                quapy.functional module

                +

                quapy.functional module

                -quapy.functional.HellingerDistance(P, Q) float[source]
                +quapy.functional.HellingerDistance(P: ndarray, Q: ndarray) float[source]

                Computes the Hellingher Distance (HD) between (discretized) distributions P and Q. The HD for two discrete distributions of k bins is defined as:

                @@ -1402,7 +1429,7 @@ The HD for two discrete distributions of k bins is defined as:

                -quapy.functional.TopsoeDistance(P, Q, epsilon=1e-20)[source]
                +quapy.functional.TopsoeDistance(P: ndarray, Q: ndarray, epsilon: float = 1e-20)[source]

                Topsoe distance between two (discretized) distributions P and Q. The Topsoe distance for two discrete distributions of k bins is defined as:

                @@ -1423,7 +1450,7 @@ The Topsoe distance for two discrete distributions of k bins is def
                -quapy.functional.adjusted_quantification(prevalence_estim, tpr, fpr, clip=True)[source]
                +quapy.functional.adjusted_quantification(prevalence_estim: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], tpr: float, fpr: float, clip: bool = True)[source]

                Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the positive class p comes down to computing:

                @@ -1445,12 +1472,28 @@ positive class p comes down to computing:

                -quapy.functional.argmin_prevalence(loss, n_classes, method='optim_minimize')[source]
                -
                +quapy.functional.argmin_prevalence(loss: Callable, n_classes: int, method: Literal['optim_minimize', 'linear_search', 'ternary_search'] = 'optim_minimize')[source] +

                Searches for the prevalence vector that minimizes a loss function.

                +
                +
                Parameters:
                +
                  +
                • loss – callable, the function to minimize

                • +
                • n_classes – int, number of classes

                • +
                • method – string indicating the search strategy. Possible values are:: +‘optim_minimize’: uses scipy.optim +‘linear_search’: carries out a linear search for binary problems in the space [0, 0.01, 0.02, …, 1] +‘ternary_search’: implements the ternary search (not yet implemented)

                • +
                +
                +
                Returns:
                +

                np.ndarray, a prevalence vector

                +
                +
                +
                -quapy.functional.as_binary_prevalence(positive_prevalence: float | ndarray, clip_if_necessary=False)[source]
                +quapy.functional.as_binary_prevalence(positive_prevalence: Union[float, ndarray], clip_if_necessary: bool = False)[source]

                Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two values representing a binary distribution.

                @@ -1469,11 +1512,11 @@ is valid. If False, it then checks that the value is in the valid range, and rai
                -quapy.functional.check_prevalence_vector(p, raise_exception=False, toleranze=1e-08)[source]
                +quapy.functional.check_prevalence_vector(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], raise_exception: bool = False, toleranze: float = 1e-08)[source]

                Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1.

                Parameters:
                -

                p – the prevalence vector to check

                +

                prevalences – the prevalence vector to check

                Returns:

                True if p is valid, False otherwise

                @@ -1481,14 +1524,64 @@ is valid. If False, it then checks that the value is in the valid range, and rai
                +
                +
                +quapy.functional.clip_prevalence(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], method: Literal[None, 'none', 'clip', 'project']) ndarray[source]
                +

                Clips the proportions vector prevalences so that it is a valid probability distribution, i.e., all values +are in [0,1] and sum up to 1.

                +
                +
                Parameters:
                +
                  +
                • prevalences – array-like, the proportions vector to be clipped, shape (n_classes,)

                • +
                • method – indicates the method to be used for normalization. +If None or “none”, no normalization is performed. +If “clip”, the values are clipped to the range [0,1] and normalized, so they sum up to 1. +If “project”, the values are projected onto the probability simplex.

                • +
                +
                +
                Returns:
                +

                the normalized prevalence vector, shape (n_classes,)

                +
                +
                +
                + +
                +
                +quapy.functional.counts_from_labels(labels: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], classes: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]])[source]
                +

                Computes the count values from a vector of labels.

                +
                +
                Parameters:
                +
                  +
                • labels – array-like of shape (n_instances,) with the label for each instance

                • +
                • classes – the class labels. This is needed in order to correctly compute the prevalence vector even when +some classes have no examples.

                • +
                +
                +
                Returns:
                +

                an ndarray of shape (len(classes),) with the occurrence counts of each class

                +
                +
                +
                +
                -quapy.functional.get_divergence(divergence: str | Callable)[source]
                -
                +quapy.functional.get_divergence(divergence: Union[str, Callable])[source] +

                Guarantees that the divergence received as argument is a function. That is, if this argument is already +a callable, then it is returned, if it is instead a string, then tries to instantiate the corresponding +divergence from the string name.

                +
                +
                Parameters:
                +

                divergence – callable or string indicating the name of the divergence function

                +
                +
                Returns:
                +

                callable

                +
                +
                +
                -quapy.functional.get_nprevpoints_approximation(combinations_budget: int, n_classes: int, n_repeats: int = 1)[source]
                +quapy.functional.get_nprevpoints_approximation(combinations_budget: int, n_classes: int, n_repeats: int = 1)[source]

                Searches for the largest number of (equidistant) prevalence points to define for each of the n_classes classes so that the number of valid prevalence values generated as combinations of prevalence points (points in a n_classes-dimensional simplex) do not exceed combinations_budget.

                @@ -1508,7 +1601,7 @@ that the number of valid prevalence values generated as combinations of prevalen
                +quapy.functional.linear_search(loss: Callable, n_classes: int)[source]

                Performs a linear search for the best prevalence value in binary problems. The search is carried out by exploring the range [0,1] stepping by 0.01. This search is inefficient, and is added only for completeness (some of the early methods in quantification literature used it, e.g., HDy). A most powerful alternative is optim_minimize.

                @@ -1525,9 +1618,29 @@ early methods in quantification literature used it, e.g., HDy). A most powerful
                +
                +
                +quapy.functional.map_onto_probability_simplex(unnormalized_arr: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
                +

                Projects a point onto the probability simplex.

                +

                The code is adapted from Mathieu Blondel’s BSD-licensed +implementation +which is accompanying the paper

                +

                Mathieu Blondel, Akinori Fujino, and Naonori Ueda. +Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex, +ICPR 2014, URL

                +
                +
                Parameters:
                +

                unnormalized_arr – point in n-dimensional space, shape (n,)

                +
                +
                Returns:
                +

                projection of v onto (n-1)-dimensional probability simplex, shape (n,)

                +
                +
                +
                +
                -quapy.functional.normalize_prevalence(prevalences)[source]
                +quapy.functional.normalize_prevalence(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]])[source]

                Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in cases in which the prevalence values are not all-zeros, and to convert the prevalence values into 1/n_classes in cases in which all values are zero.

                @@ -1543,7 +1656,7 @@ cases in which all values are zero.

                -quapy.functional.num_prevalence_combinations(n_prevpoints: int, n_classes: int, n_repeats: int = 1)[source]
                +quapy.functional.num_prevalence_combinations(n_prevpoints: int, n_classes: int, n_repeats: int = 1)[source]

                Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if n_prevpoints equally distant prevalence values are generated and n_repeats repetitions are requested. The computation comes down to calculating:

                @@ -1569,7 +1682,7 @@ number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0
                -quapy.functional.optim_minimize(loss, n_classes)[source]
                +quapy.functional.optim_minimize(loss: Callable, n_classes: int)[source]

                Searches for the optimal prevalence values, i.e., an n_classes-dimensional vector of the (n_classes-1)-simplex that yields the smallest lost. This optimization is carried out by means of a constrained search using scipy’s SLSQP routine.

                @@ -1588,12 +1701,12 @@ SLSQP routine.

                -quapy.functional.prevalence_from_labels(labels, classes)[source]
                -

                Computed the prevalence values from a vector of labels.

                +quapy.functional.prevalence_from_labels(labels: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], classes: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]])[source] +

                Computes the prevalence values from a vector of labels.

                Parameters:
                  -
                • labels – array-like of shape (n_instances) with the label for each instance

                • +
                • labels – array-like of shape (n_instances,) with the label for each instance

                • classes – the class labels. This is needed in order to correctly compute the prevalence vector even when some classes have no examples.

                @@ -1606,7 +1719,7 @@ some classes have no examples.

                -quapy.functional.prevalence_from_probabilities(posteriors, binarize: bool = False)[source]
                +quapy.functional.prevalence_from_probabilities(posteriors: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], binarize: bool = False)[source]

                Returns a vector of prevalence values from a matrix of posterior probabilities.

                Parameters:
                @@ -1624,7 +1737,7 @@ converting the vectors of posterior probabilities into class indices, by taking
                -quapy.functional.prevalence_linspace(n_prevalences=21, repeats=1, smooth_limits_epsilon=0.01)[source]
                +quapy.functional.prevalence_linspace(grid_points: int = 21, repeats: int = 1, smooth_limits_epsilon: float = 0.01)[source]

                Produces an array of uniformly separated values of prevalence. By default, produces an array of 21 prevalence values, with step 0.05 and with the limits smoothed, i.e.: @@ -1632,7 +1745,7 @@ step 0.05 and with the limits smoothed, i.e.:

                Parameters:
                  -
                • n_prevalences – the number of prevalence values to sample from the [0,1] interval (default 21)

                • +
                • grid_points – the number of prevalence values to sample from the [0,1] interval (default 21)

                • repeats – number of times each prevalence is to be repeated (defaults to 1)

                • smooth_limits_epsilon – the quantity to add and subtract to the limits 0 and 1

                @@ -1643,9 +1756,41 @@ step 0.05 and with the limits smoothed, i.e.:
                +
                +
                +quapy.functional.solve_adjustment(p_c_cond_y: ndarray, p_c: ndarray, method: Literal['inversion', 'invariant-ratio'], solver: Literal['exact', 'minimize', 'exact-raise', 'exact-cc']) ndarray[source]
                +

                Function that tries to solve for the equation \(P(C)=P(C|Y)P(Y)\), where \(P(C)\) is the vector of +prevalence values obtained by a classify and count, and \(P(C|Y)\) are the class-conditional misclassification +rates of the classifier.

                +
                +
                Parameters:
                +
                  +
                • p_c_cond_y – array of shape (n_classes, n_classes,) with entry (c,y) being the estimate +of \(P(C=c|Y=y)\), that is, the probability that an instance that belongs to class \(y\) +ends up being classified as belonging to class \(c\)

                • +
                • p_c – array of shape (n_classes,) containing the prevalence values as estimated by classify and count

                • +
                • method (str) –

                  indicates the adjustment method to be used. Valid options are:

                  +
                    +
                  • ’inversion’: tries to solve the equation \(P(C)=P(C|Y)P(Y)\) as \(P(Y) = P(C|Y)^{-1} P(C)\) where \(P(C|Y)^{-1}\) is the matrix inversion of \(P(C|Y)\). This inversion may not exist in degenerated cases

                  • +
                  • ’invariant-ratio’: invariant ratio estimator of Vaz et al. 2018, which replaces the last equation with the normalization condition.

                  • +
                  +

                • +
                • solver (str) –

                  the method to use for solving the system of linear equations. Valid options are:

                  +
                    +
                  • ’exact-raise’: tries to solve the system using matrix inversion. Raises an error if the matrix has rank strictly less than n_classes.

                  • +
                  • ’exact-cc’: if the matrix is not of full rank, returns p_c as the estimates, which corresponds to no adjustment (i.e., the classify and count method. See quapy.method.aggregative.CC)

                  • +
                  • ’exact’: deprecated, defaults to ‘exact-cc’

                  • +
                  • ’minimize’: minimizes a loss, so the solution always exists

                  • +
                  +

                • +
                +
                +
                +
                +
                -quapy.functional.strprev(prevalences, prec=3)[source]
                +quapy.functional.strprev(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], prec: int = 3)[source]

                Returns a string representation for a prevalence vector. E.g.,

                >>> strprev([1/3, 2/3], prec=2)
                 >>> '[0.33, 0.67]'
                @@ -1666,7 +1811,7 @@ step 0.05 and with the limits smoothed, i.e.:
                 
                 
                -quapy.functional.uniform_prevalence_sampling(n_classes, size=1)[source]
                +quapy.functional.uniform_prevalence_sampling(n_classes: int, size: int = 1)[source]

                Implements the Kraemer algorithm for sampling uniformly at random from the unit simplex. This implementation is adapted from this post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_.

                @@ -1685,7 +1830,7 @@ for sampling uniformly at random from the unit simplex. This implementation is a
                -quapy.functional.uniform_simplex_sampling(n_classes, size=1)
                +quapy.functional.uniform_simplex_sampling(n_classes: int, size: int = 1)

                Implements the Kraemer algorithm for sampling uniformly at random from the unit simplex. This implementation is adapted from this post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_.

                @@ -1704,26 +1849,26 @@ for sampling uniformly at random from the unit simplex. This implementation is a
                -

                quapy.model_selection module

                +

                quapy.model_selection module

                -class quapy.model_selection.ConfigStatus(params, status, msg='')[source]
                +class quapy.model_selection.ConfigStatus(params, status, msg='')[source]

                Bases: object

                -failed()[source]
                +failed()[source]
                -success()[source]
                +success()[source]
                -class quapy.model_selection.GridSearchQ(model: ~quapy.method.base.BaseQuantifier, param_grid: dict, protocol: ~quapy.protocol.AbstractProtocol, error: ~typing.Callable | str = <function mae>, refit=True, timeout=-1, n_jobs=None, raise_errors=False, verbose=False)[source]
                +class quapy.model_selection.GridSearchQ(model: ~quapy.method.base.BaseQuantifier, param_grid: dict, protocol: ~quapy.protocol.AbstractProtocol, error: ~typing.Union[~typing.Callable, str] = <function mae>, refit=True, timeout=-1, n_jobs=None, raise_errors=False, verbose=False)[source]

                Bases: BaseQuantifier

                Grid Search optimization targeting a quantification-oriented metric.

                Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation @@ -1750,7 +1895,7 @@ However, if no configuration yields a valid model, then a ValueError exception w

                -best_model()[source]
                +best_model()[source]

                Returns the best model found after calling the fit() method, i.e., the one trained on the combination of hyper-parameters that minimized the error function.

                @@ -1762,7 +1907,7 @@ of hyper-parameters that minimized the error function.

                -fit(training: LabelledCollection)[source]
                +fit(training: LabelledCollection)[source]
                Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing

                the error metric.

                @@ -1779,7 +1924,7 @@ of hyper-parameters that minimized the error function.

                -get_params(deep=True)[source]
                +get_params(deep=True)[source]

                Returns the dictionary of hyper-parameters to explore (param_grid)

                Parameters:
                @@ -1793,7 +1938,7 @@ of hyper-parameters that minimized the error function.

                -quantify(instances)[source]
                +quantify(instances)[source]

                Estimate class prevalence values using the best model found after calling the fit() method.

                Parameters:
                @@ -1808,7 +1953,7 @@ by the model selection process.

                -set_params(**parameters)[source]
                +set_params(**parameters)[source]

                Sets the hyper-parameters to explore.

                Parameters:
                @@ -1821,34 +1966,34 @@ by the model selection process.

                -class quapy.model_selection.Status(value)[source]
                +class quapy.model_selection.Status(value)[source]

                Bases: Enum

                An enumeration.

                -ERROR = 4
                +ERROR = 4
                -INVALID = 3
                +INVALID = 3
                -SUCCESS = 1
                +SUCCESS = 1
                -TIMEOUT = 2
                +TIMEOUT = 2
                -quapy.model_selection.cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfolds=3, random_state=0)[source]
                +quapy.model_selection.cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfolds=3, random_state=0)[source]

                Akin to scikit-learn’s cross_val_predict but for quantification.

                @@ -1868,7 +2013,7 @@ but for quantification.

                -quapy.model_selection.expand_grid(param_grid: dict)[source]
                +quapy.model_selection.expand_grid(param_grid: dict)[source]

                Expands a param_grid dictionary as a list of configurations. Example:

                >>> combinations = expand_grid({'A': [1, 10, 100], 'B': [True, False]})
                @@ -1889,7 +2034,7 @@ to explore for that hyper-parameter

                -quapy.model_selection.group_params(param_grid: dict)[source]
                +quapy.model_selection.group_params(param_grid: dict)[source]

                Partitions a param_grid dictionary as two lists of configurations, one for the classifier-specific hyper-parameters, and another for que quantifier-specific hyper-parameters

                @@ -1905,10 +2050,10 @@ to explore for that hyper-parameter

                -

                quapy.plot module

                +

                quapy.plot module

                -quapy.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=5, colormap=<matplotlib.colors.ListedColormap object>, vertical_xticks=False, legend=True, savepath=None)[source]
                +quapy.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=5, colormap=<matplotlib.colors.ListedColormap object>, vertical_xticks=False, legend=True, savepath=None)[source]

                Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value) for different bins of (true) prevalence of the positive classs, for each quantification method.

                @@ -1933,7 +2078,7 @@ for each experiment

                -quapy.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None)[source]
                +quapy.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None)[source]

                Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value) for each quantification method with respect to a given positive class.

                @@ -1954,7 +2099,7 @@ for each experiment

                -quapy.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, train_prev=None, savepath=None, method_order=None)[source]
                +quapy.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True, train_prev=None, savepath=None, method_order=None)[source]

                The diagonal plot displays the predicted prevalence values (along the y-axis) as a function of the true prevalence values (along the x-axis). The optimal quantifier is described by the diagonal (0,0)-(1,1) of the plot (hence the name). It is convenient for binary quantification problems, though it can be used for multiclass problems by @@ -1985,7 +2130,7 @@ listed in the legend and associated with matplotlib colors).

                -quapy.plot.brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, binning='isomerous', x_error='ae', y_error='ae', ttest_alpha=0.005, tail_density_threshold=0.005, method_order=None, savepath=None)[source]
                +quapy.plot.brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, binning='isomerous', x_error='ae', y_error='ae', ttest_alpha=0.005, tail_density_threshold=0.005, method_order=None, savepath=None)[source]

                Displays (only) the top performing methods for different regions of the train-test shift in form of a broken bar chart, in which each method has bars only for those regions in which either one of the following conditions hold: (i) it is the best method (in average) for the bin, or (ii) it is not statistically significantly different @@ -2027,7 +2172,7 @@ listed in the legend and associated with matplotlib colors).

                -quapy.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, error_name='ae', show_std=False, show_density=True, show_legend=True, logscale=False, title='Quantification error as a function of distribution shift', vlines=None, method_order=None, savepath=None)[source]
                +quapy.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, error_name='ae', show_std=False, show_density=True, show_legend=True, logscale=False, title='Quantification error as a function of distribution shift', vlines=None, method_order=None, savepath=None)[source]

                Plots the error (along the x-axis, as measured in terms of error_name) as a function of the train-test shift (along the y-axis, as measured in terms of quapy.error.ae()). This plot is useful especially for multiclass problems, in which “diagonal plots” may be cumbersone, and in order to gain understanding about how methods @@ -2061,10 +2206,10 @@ listed in the legend and associated with matplotlib colors).

                -

                quapy.protocol module

                +

                quapy.protocol module

                -class quapy.protocol.APP(data: LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=0, sanity_check=10000, return_type='sample_prev')[source]
                +class quapy.protocol.APP(data: LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=0, sanity_check=10000, return_type='sample_prev')[source]

                Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

                Implementation of the artificial prevalence protocol (APP). The APP consists of exploring a grid of prevalence values containing n_prevalences points (e.g., @@ -2093,7 +2238,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -prevalence_grid()[source]
                +prevalence_grid()[source]

                Generates vectors of prevalence values from an exhaustive grid of prevalence values. The number of prevalence values explored for each dimension depends on n_prevalences, so that, if, for example, n_prevalences=11 then the prevalence values of the grid are taken from [0, 0.1, 0.2, …, 0.9, 1]. Only @@ -2113,7 +2258,7 @@ in the grid multiplied by repeat

                -sample(index)[source]
                +sample(index)[source]

                Realizes the sample given the index of the instances.

                Parameters:
                @@ -2127,7 +2272,7 @@ in the grid multiplied by repeat

                -samples_parameters()[source]
                +samples_parameters()[source]

                Return all the necessary parameters to replicate the samples as according to the APP protocol.

                Returns:
                @@ -2138,7 +2283,7 @@ in the grid multiplied by repeat

                -total()[source]
                +total()[source]

                Returns the number of samples that will be generated

                Returns:
                @@ -2151,12 +2296,12 @@ in the grid multiplied by repeat

                -class quapy.protocol.AbstractProtocol[source]
                +class quapy.protocol.AbstractProtocol[source]

                Bases: object

                Abstract parent class for sample generation protocols.

                -total()[source]
                +total()[source]

                Indicates the total number of samples that the protocol generates.

                Returns:
                @@ -2169,7 +2314,7 @@ in the grid multiplied by repeat

                -class quapy.protocol.AbstractStochasticSeededProtocol(random_state=0)[source]
                +class quapy.protocol.AbstractStochasticSeededProtocol(random_state=0)[source]

                Bases: AbstractProtocol

                An AbstractStochasticSeededProtocol is a protocol that generates, via any random procedure (e.g., via random sampling), sequences of quapy.data.base.LabelledCollection samples. @@ -2187,7 +2332,7 @@ the sequence will be consistent every time the protocol is called.

                -collator(sample, *args)[source]
                +collator(sample, *args)[source]

                The collator prepares the sample to accommodate the desired output format before returning the output. This collator simply returns the sample as it is. Classes inheriting from this abstract class can implement their custom collators.

                @@ -2206,12 +2351,12 @@ implement their custom collators.

                -property random_state
                +property random_state
                -abstract sample(params)[source]
                +abstract sample(params)[source]

                Extract one sample determined by the given parameters

                Parameters:
                @@ -2225,7 +2370,7 @@ implement their custom collators.

                -abstract samples_parameters()[source]
                +abstract samples_parameters()[source]

                This function has to return all the necessary parameters to replicate the samples

                Returns:
                @@ -2238,13 +2383,13 @@ implement their custom collators.

                -quapy.protocol.ArtificialPrevalenceProtocol
                +quapy.protocol.ArtificialPrevalenceProtocol

                alias of APP

                -class quapy.protocol.DomainMixer(domainA: LabelledCollection, domainB: LabelledCollection, sample_size, repeats=1, prevalence=None, mixture_points=11, random_state=0, return_type='sample_prev')[source]
                +class quapy.protocol.DomainMixer(domainA: LabelledCollection, domainB: LabelledCollection, sample_size, repeats=1, prevalence=None, mixture_points=11, random_state=0, return_type='sample_prev')[source]

                Bases: AbstractStochasticSeededProtocol

                Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence.

                @@ -2268,7 +2413,7 @@ will be the same every time the protocol is called)

                -sample(indexes)[source]
                +sample(indexes)[source]

                Realizes the sample given a pair of indexes of the instances from A and B.

                Parameters:
                @@ -2282,7 +2427,7 @@ will be the same every time the protocol is called)

                -samples_parameters()[source]
                +samples_parameters()[source]

                Return all the necessary parameters to replicate the samples as according to the this protocol.

                Returns:
                @@ -2293,7 +2438,7 @@ will be the same every time the protocol is called)

                -total()[source]
                +total()[source]

                Returns the number of samples that will be generated (equals to “repeats * mixture_points”)

                Returns:
                @@ -2306,7 +2451,7 @@ will be the same every time the protocol is called)

                -class quapy.protocol.IterateProtocol(samples: [<class 'quapy.data.base.LabelledCollection'>])[source]
                +class quapy.protocol.IterateProtocol(samples: [<class 'quapy.data.base.LabelledCollection'>])[source]

                Bases: AbstractProtocol

                A very simple protocol which simply iterates over a list of previously generated samples

                @@ -2316,7 +2461,7 @@ will be the same every time the protocol is called)

                -total()[source]
                +total()[source]

                Returns the number of samples in this protocol

                Returns:
                @@ -2329,7 +2474,7 @@ will be the same every time the protocol is called)

                -class quapy.protocol.NPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev')[source]
                +class quapy.protocol.NPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev')[source]

                Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

                A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural prevalence of the collection.

                @@ -2349,7 +2494,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -sample(index)[source]
                +sample(index)[source]

                Realizes the sample given the index of the instances.

                Parameters:
                @@ -2363,7 +2508,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -samples_parameters()[source]
                +samples_parameters()[source]

                Return all the necessary parameters to replicate the samples as according to the NPP protocol.

                Returns:
                @@ -2374,7 +2519,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -total()[source]
                +total()[source]

                Returns the number of samples that will be generated (equals to “repeats”)

                Returns:
                @@ -2387,23 +2532,23 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -quapy.protocol.NaturalPrevalenceProtocol
                +quapy.protocol.NaturalPrevalenceProtocol

                alias of NPP

                -class quapy.protocol.OnLabelledCollectionProtocol[source]
                +class quapy.protocol.OnLabelledCollectionProtocol[source]

                Bases: object

                Protocols that generate samples from a qp.data.LabelledCollection object.

                -RETURN_TYPES = ['sample_prev', 'labelled_collection', 'index']
                +RETURN_TYPES = ['sample_prev', 'labelled_collection', 'index']
                -classmethod get_collator(return_type='sample_prev')[source]
                +classmethod get_collator(return_type='sample_prev')[source]

                Returns a collator function, i.e., a function that prepares the yielded data

                Parameters:
                @@ -2420,7 +2565,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -get_labelled_collection()[source]
                +get_labelled_collection()[source]

                Returns the labelled collection on which this protocol acts.

                Returns:
                @@ -2431,7 +2576,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -on_preclassified_instances(pre_classifications, in_place=False)[source]
                +on_preclassified_instances(pre_classifications, in_place=False)[source]

                Returns a copy of this protocol that acts on a modified version of the original qp.data.LabelledCollection in which the original instances have been replaced with the outputs of a classifier for each instance. (This is convenient for speeding-up @@ -2455,7 +2600,7 @@ with shape (n_instances,) when the classifier is a hard one, or wit

                -class quapy.protocol.UPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev')[source]
                +class quapy.protocol.UPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev')[source]

                Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

                A variant of APP that, instead of using a grid of equidistant prevalence values, relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with @@ -2479,7 +2624,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -sample(index)[source]
                +sample(index)[source]

                Realizes the sample given the index of the instances.

                Parameters:
                @@ -2493,7 +2638,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -samples_parameters()[source]
                +samples_parameters()[source]

                Return all the necessary parameters to replicate the samples as according to the UPP protocol.

                Returns:
                @@ -2504,7 +2649,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -total()[source]
                +total()[source]

                Returns the number of samples that will be generated (equals to “repeats”)

                Returns:
                @@ -2517,16 +2662,16 @@ to “labelled_collection” to get instead instances of LabelledCollection

                <
                -quapy.protocol.UniformPrevalenceProtocol
                +quapy.protocol.UniformPrevalenceProtocol

                alias of UPP

                -

                quapy.util module

                +

                quapy.util module

                -class quapy.util.EarlyStop(patience, lower_is_better=True)[source]
                +class quapy.util.EarlyStop(patience, lower_is_better=True)[source]

                Bases: object

                A class implementing the early-stopping condition typically used for training neural networks.

                >>> earlystop = EarlyStop(patience=2, lower_is_better=True)
                @@ -2563,7 +2708,7 @@ stopping condition. An instance of this class is callable, and is t
                 
                 
                -quapy.util.create_if_not_exist(path)[source]
                +quapy.util.create_if_not_exist(path)[source]

                An alias to os.makedirs(path, exist_ok=True) that also returns the path. This is useful in cases like, e.g.:

                >>> path = create_if_not_exist(os.path.join(dir, subdir, anotherdir))
                 
                @@ -2580,7 +2725,7 @@ stopping condition. An instance of this class is callable, and is t
                -quapy.util.create_parent_dir(path)[source]
                +quapy.util.create_parent_dir(path)[source]

                Creates the parent dir (if any) of a given path, if not exists. E.g., for ./path/to/file.txt, the path ./path/to is created.

                @@ -2592,7 +2737,7 @@ is created.

                -quapy.util.download_file(url, archive_filename)[source]
                +quapy.util.download_file(url, archive_filename)[source]

                Downloads a file from a url

                Parameters:
                @@ -2606,7 +2751,7 @@ is created.

                -quapy.util.download_file_if_not_exists(url, archive_filename)[source]
                +quapy.util.download_file_if_not_exists(url, archive_filename)[source]

                Dowloads a function (using download_file()) if the file does not exist.

                Parameters:
                @@ -2620,7 +2765,7 @@ is created.

                -quapy.util.get_quapy_home()[source]
                +quapy.util.get_quapy_home()[source]

                Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets. This directory is ~/quapy_data

                @@ -2632,7 +2777,7 @@ This directory is ~/quapy_data

                -quapy.util.map_parallel(func, args, n_jobs)[source]
                +quapy.util.map_parallel(func, args, n_jobs)[source]

                Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function that already works with a list of arguments.

                @@ -2649,7 +2794,7 @@ that already works with a list of arguments.

                -quapy.util.parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky')[source]
                +quapy.util.parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky')[source]

                A wrapper of multiprocessing:

                >>> Parallel(n_jobs=n_jobs)(
                 >>>      delayed(func)(args_i) for args_i in args
                @@ -2666,6 +2811,31 @@ Seeds the child processes to ensure reproducibility when n_jobs>1.

              • seed – the numeric seed

              • asarray – set to True to return a np.ndarray instead of a list

              • backend – indicates the backend used for handling parallel works

              • +
              • open_args – if True, then the delayed function is called on *args_i, instead of on args_i

              • +
              + + + + +
              +
              +quapy.util.parallel_unpack(func, args, n_jobs, seed=None, asarray=True, backend='loky')[source]
              +

              A wrapper of multiprocessing:

              +
              >>> Parallel(n_jobs=n_jobs)(
              +>>>      delayed(func)(*args_i) for args_i in args
              +>>> )
              +
              +
              +

              that takes the quapy.environ variable as input silently. +Seeds the child processes to ensure reproducibility when n_jobs>1.

              +
              +
              Parameters:
              +
                +
              • func – callable

              • +
              • args – args of func

              • +
              • seed – the numeric seed

              • +
              • asarray – set to True to return a np.ndarray instead of a list

              • +
              • backend – indicates the backend used for handling parallel works

              @@ -2673,7 +2843,7 @@ Seeds the child processes to ensure reproducibility when n_jobs>1.

              -quapy.util.pickled_resource(pickle_path: str, generation_func: callable, *args)[source]
              +quapy.util.pickled_resource(pickle_path: str, generation_func: callable, *args)[source]

              Allows for fast reuse of resources that are generated only once by calling generation_func(*args). The next times this function is invoked, it loads the pickled resource. Example:

              >>> def some_array(n):  # a mock resource created with one parameter (`n`)
              @@ -2698,7 +2868,7 @@ this function is invoked, it loads the pickled resource. Example:

              -quapy.util.save_text_file(path, text)[source]
              +quapy.util.save_text_file(path, text)[source]

              Saves a text file to disk, given its full path, and creates the parent directory if missing.

              Parameters:
              @@ -2712,7 +2882,7 @@ this function is invoked, it loads the pickled resource. Example:

              -quapy.util.temp_seed(random_state)[source]
              +quapy.util.temp_seed(random_state)[source]

              Can be used in a “with” context to set a temporal seed without modifying the outer numpy’s current state. E.g.:

              >>> with temp_seed(random_seed):
               >>>  pass # do any computation depending on np.random functionality
              @@ -2727,7 +2897,7 @@ this function is invoked, it loads the pickled resource. Example:

              -quapy.util.timeout(seconds)[source]
              +quapy.util.timeout(seconds)[source]

              Opens a context that will launch an exception if not closed after a given number of seconds

              >>> def func(start_msg, end_msg):
               >>>     print(start_msg)
              @@ -2750,7 +2920,7 @@ this function is invoked, it loads the pickled resource. Example:

            -

            Module contents

            +

            Module contents

            QuaPy module for quantification

          diff --git a/docs/build/html/quapy.method.html b/docs/build/html/quapy.method.html index e843d2a..053fec3 100644 --- a/docs/build/html/quapy.method.html +++ b/docs/build/html/quapy.method.html @@ -1,23 +1,24 @@ - + - quapy.method package — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + quapy.method package — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + - - - - - + + + + + + @@ -95,15 +96,15 @@
          -

          quapy.method package

          +

          quapy.method package

          -

          Submodules

          +

          Submodules

          -

          quapy.method.aggregative module

          +

          quapy.method.aggregative module

          -class quapy.method.aggregative.ACC(classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize')[source]
          +class quapy.method.aggregative.ACC(classifier: BaseEstimator, val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', clipping: Literal['clip', 'none', 'project'] = 'clip', n_jobs=None)[source]

          Bases: AggregativeCrispQuantifier

          Adjusted Classify & Count, the “adjusted” variant of CC, that corrects the predictions of CC @@ -119,24 +120,49 @@ are to be generated in a k-fold cross-validation manner (with this for k); or as a collection defining the specific set of data to use for validation. Alternatively, this set can be specified at fit time by indicating the exact set of data on which the predictions are to be generated.

          +
        • method (str) –

          adjustment method to be used:

          +
            +
          • ’inversion’: matrix inversion method based on the matrix equality \(P(C)=P(C|Y)P(Y)\), which tries to invert \(P(C|Y)\) matrix.

          • +
          • ’invariant-ratio’: invariant ratio estimator of Vaz et al. 2018, which replaces the last equation with the normalization condition.

          • +
          +

        • +
        • solver (str) –

          indicates the method to use for solving the system of linear equations. Valid options are:

          + +

        • +
        • clipping (str) –

          the method to use for normalization.

          +
            +
          • If None or “none”, no normalization is performed.

          • +
          • If “clip”, the values are clipped to the range [0,1] and normalized, so they sum up to 1.

          • +
          • If “project”, the values are projected onto the probability simplex.

          • +
          +

        • n_jobs – number of parallel workers

        • -
        • solver – indicates the method to be used for obtaining the final estimates. The choice -‘exact’ comes down to solving the system of linear equations \(Ax=B\) where A is a -matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in -binary) and B is the vector of prevalence values estimated via CC, as \(x=A^{-1}B\). This solution -might not exist for degenerated classifiers, in which case the method defaults to classify and count -(i.e., does not attempt any adjustment). -Another option is to search for the prevalence vector that minimizes the L2 norm of \(|Ax-B|\). The latter -is achieved by indicating solver=’minimize’. This one generally works better, and is the default parameter. -More details about this can be consulted in Bunse, M. “On Multi-Class Extensions of Adjusted Classify and -Count”, on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications -(LQ 2022), ECML/PKDD 2022, Grenoble (France).

        +
        +
        +CLIPPING = ['clip', 'none', 'project', None]
        +
        + +
        +
        +METHODS = ['inversion', 'invariant-ratio']
        +
        + +
        +
        +SOLVERS = ['exact', 'minimize', 'exact-raise', 'exact-cc']
        +
        +
        -aggregate(classif_predictions)[source]
        +aggregate(classif_predictions)[source]

        Implements the aggregation of label predictions.

        Parameters:
        @@ -150,62 +176,82 @@ Count”, on proceedings of the 2nd International Workshop on Learning to Quanti
        -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
        +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

        Estimates the misclassification rates.

        Parameters:
        -

        classif_predictions – classifier predictions with true labels

        +
        -classmethod getPteCondEstim(classes, y, y_)[source]
        -
        - -
        -
        -classmethod solve_adjustment(PteCondEstim, prevs_estim, solver='exact')[source]
        -

        Solves the system linear system \(Ax = B\) with \(A\) = PteCondEstim and \(B\) = prevs_estim

        +classmethod getPteCondEstim(classes, y, y_)[source] +

        Estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a +document that belongs to yj ends up being classified as belonging to yi

        Parameters:
          -
        • PteCondEstim – a np.ndarray of shape (n_classes,n_classes,) with entry (i,j) being the estimate -of \(P(y_i|y_j)\), that is, the probability that an instance that belongs to \(y_j\) ends up being -classified as belonging to \(y_i\)

        • -
        • prevs_estim – a np.ndarray of shape (n_classes,) with the class prevalence estimates

        • -
        • solver – indicates the method to use for solving the system of linear equations. Valid options are -‘exact’ (tries to solve the system –may fail if the misclassificatin matrix has rank < n_classes) or -‘optim_minimize’ (minimizes a norm –always exists).

        • +
        • classes – array-like with the class names

        • +
        • y – array-like with the true labels

        • +
        • y – array-like with the estimated labels

        Returns:
        -

        an adjusted np.ndarray of shape (n_classes,) with the corrected class prevalence estimates

        +

        np.ndarray

        +
        +
        +classmethod newInvariantRatioEstimation(classifier: BaseEstimator, val_split=5, n_jobs=None)[source]
        +

        Constructs a quantifier that implements the Invariant Ratio Estimator of +Vaz et al. 2018 <https://jmlr.org/papers/v20/18-456.html>_. This amounts +to setting method to ‘invariant-ratio’ and clipping to ‘project’.

        +
        +
        Parameters:
        +
          +
        • classifier – a sklearn’s Estimator that generates a classifier

        • +
        • val_split – specifies the data used for generating classifier predictions. This specification

        • +
        +
        +
        +

        can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to +be extracted from the training set; or as an integer (default 5), indicating that the predictions +are to be generated in a k-fold cross-validation manner (with this integer indicating the value +for k); or as a collection defining the specific set of data to use for validation. +Alternatively, this set can be specified at fit time by indicating the exact set of data +on which the predictions are to be generated. +:param n_jobs: number of parallel workers +:return: an instance of ACC configured so that it implements the Invariant Ratio Estimator

        +
        +
        -quapy.method.aggregative.AdjustedClassifyAndCount
        +quapy.method.aggregative.AdjustedClassifyAndCount

        alias of ACC

        -class quapy.method.aggregative.AggregativeCrispQuantifier[source]
        +class quapy.method.aggregative.AggregativeCrispQuantifier[source]

        Bases: AggregativeQuantifier, ABC

        -

        Abstract class for quantification methods that base their estimations on the aggregation of crips decisions +

        Abstract class for quantification methods that base their estimations on the aggregation of crisp decisions as returned by a hard classifier. Aggregative crisp quantifiers thus extend Aggregative Quantifiers by implementing specifications about crisp predictions.

        -class quapy.method.aggregative.AggregativeMedianEstimator(base_quantifier: AggregativeQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]
        +class quapy.method.aggregative.AggregativeMedianEstimator(base_quantifier: AggregativeQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]

        Bases: BinaryQuantifier

        This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the estimation returned by differently (hyper)parameterized base quantifiers. @@ -223,7 +269,7 @@ i.e., in cases of binary quantification.

        -fit(training: LabelledCollection, **kwargs)[source]
        +fit(training: LabelledCollection, **kwargs)[source]

        Trains a quantifier.

        Parameters:
        @@ -237,7 +283,7 @@ i.e., in cases of binary quantification.

        -get_params(deep=True)[source]
        +get_params(deep=True)[source]

        Get parameters for this estimator.

        Parameters:
        @@ -255,7 +301,7 @@ contained subobjects that are estimators.

        -quantify(instances)[source]
        +quantify(instances)[source]

        Generate class prevalence estimates for the sample’s instances

        Parameters:
        @@ -269,7 +315,7 @@ contained subobjects that are estimators.

        -set_params(**params)[source]
        +set_params(**params)[source]

        Set the parameters of this estimator.

        The method works on simple estimators as well as on nested objects (such as Pipeline). The latter have @@ -292,7 +338,7 @@ possible to update each component of a nested object.

        -class quapy.method.aggregative.AggregativeQuantifier[source]
        +class quapy.method.aggregative.AggregativeQuantifier[source]

        Bases: BaseQuantifier, ABC

        Abstract class for quantification methods that base their estimations on the aggregation of classification results. Aggregative quantifiers implement a pipeline that consists of generating classification predictions @@ -306,7 +352,7 @@ and aggregate().

        -abstract aggregate(classif_predictions: ndarray)[source]
        +abstract aggregate(classif_predictions: ndarray)[source]

        Implements the aggregation of label predictions.

        Parameters:
        @@ -320,13 +366,13 @@ and
        -abstract aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
        +abstract aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

        Trains the aggregation function.

        Parameters:
          -
        • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

        • +
        • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the predictions issued by the classifier and, as labels, the true labels

        • data – a quapy.data.base.LabelledCollection consisting of the training data

        @@ -335,7 +381,7 @@ by the classifier

      • -property classes_
        +property classes_

        Class labels, in the same order in which class prevalence values are to be computed. This default implementation actually returns the class labels of the learner.

        @@ -347,7 +393,7 @@ This default implementation actually returns the class labels of the learner.

        -property classifier
        +property classifier

        Gives access to the classifier

        Returns:
        @@ -358,7 +404,7 @@ This default implementation actually returns the class labels of the learner.

        -classifier_fit_predict(data: LabelledCollection, fit_classifier=True, predict_on=None)[source]
        +classifier_fit_predict(data: LabelledCollection, fit_classifier=True, predict_on=None)[source]

        Trains the classifier if requested (fit_classifier=True) and generate the necessary predictions to train the aggregation function.

        @@ -380,7 +426,7 @@ the predictions.

        -classify(instances)[source]
        +classify(instances)[source]

        Provides the label predictions for the given instances. The predictions should respect the format expected by aggregate(), e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for non-probabilistic quantifiers. The default one is “decision_function”.

        @@ -396,7 +442,7 @@ non-probabilistic quantifiers. The default one is “decision_function”.

        -fit(data: LabelledCollection, fit_classifier=True, val_split=None)[source]
        +fit(data: LabelledCollection, fit_classifier=True, val_split=None)[source]

        Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.

        Parameters:
        @@ -414,7 +460,7 @@ learner has been trained outside the quantifier.

        -quantify(instances)[source]
        +quantify(instances)[source]

        Generate class prevalence estimates for the sample’s instances by aggregating the label predictions generated by the classifier.

        @@ -429,19 +475,19 @@ by the classifier.

        -property val_split
        +property val_split
        -val_split_ = None
        +val_split_ = None
        -class quapy.method.aggregative.AggregativeSoftQuantifier[source]
        +class quapy.method.aggregative.AggregativeSoftQuantifier[source]

        Bases: AggregativeQuantifier, ABC

        Abstract class for quantification methods that base their estimations on the aggregation of posterior probabilities as returned by a probabilistic classifier. @@ -449,13 +495,83 @@ Aggregative soft quantifiers thus extend Aggregative Quantifiers by implementing about soft predictions.

        +
        +
        +class quapy.method.aggregative.BayesianCC(classifier: BaseEstimator, val_split: float = 0.75, num_warmup: int = 500, num_samples: int = 1000, mcmc_seed: int = 0)[source]
        +

        Bases: AggregativeCrispQuantifier

        +

        Bayesian quantification method, +which is a variant of ACC that calculates the posterior probability distribution +over the prevalence vectors, rather than providing a point estimate obtained +by matrix inversion.

        +

        Can be used to diagnose degeneracy in the predictions visible when the confusion +matrix has high condition number or to quantify uncertainty around the point estimate.

        +

        This method relies on extra dependencies, which have to be installed via: +$ pip install quapy[bayes]

        +
        +
        Parameters:
        +
          +
        • classifier – a sklearn’s Estimator that generates a classifier

        • +
        • val_split – a float in (0, 1) indicating the proportion of the training data to be used, +as a stratified held-out validation set, for generating classifier predictions.

        • +
        • num_warmup – number of warmup iterations for the MCMC sampler (default 500)

        • +
        • num_samples – number of samples to draw from the posterior (default 1000)

        • +
        • mcmc_seed – random seed for the MCMC sampler (default 0)

        • +
        +
        +
        +
        +
        +aggregate(classif_predictions)[source]
        +

        Implements the aggregation of label predictions.

        +
        +
        Parameters:
        +

        classif_predictionsnp.ndarray of label predictions

        +
        +
        Returns:
        +

        np.ndarray of shape (n_classes,) with class prevalence estimates.

        +
        +
        +
        + +
        +
        +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
        +

        Estimates the misclassification rates.

        +
        +
        Parameters:
        +
        +
        +
        +
        + +
        +
        +get_conditional_probability_samples()[source]
        +
        + +
        +
        +get_prevalence_samples()[source]
        +
        + +
        +
        +sample_from_posterior(classif_predictions)[source]
        +
        + +
        +
        -class quapy.method.aggregative.BinaryAggregativeQuantifier[source]
        +class quapy.method.aggregative.BinaryAggregativeQuantifier[source]

        Bases: AggregativeQuantifier, BinaryQuantifier

        -fit(data: LabelledCollection, fit_classifier=True, val_split=None)[source]
        +fit(data: LabelledCollection, fit_classifier=True, val_split=None)[source]

        Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.

        Parameters:
        @@ -473,19 +589,19 @@ learner has been trained outside the quantifier.

        -property neg_label
        +property neg_label
        -property pos_label
        +property pos_label
        -class quapy.method.aggregative.CC(classifier: BaseEstimator)[source]
        +class quapy.method.aggregative.CC(classifier: BaseEstimator)[source]

        Bases: AggregativeCrispQuantifier

        The most basic Quantification method. One that simply classifies all instances and counts how many have been attributed to each of the classes in order to compute class prevalence estimates.

        @@ -496,7 +612,7 @@ attributed to each of the classes in order to compute class prevalence estimates
        -aggregate(classif_predictions: ndarray)[source]
        +aggregate(classif_predictions: ndarray)[source]

        Computes class prevalence estimates by counting the prevalence of each of the predicted labels.

        Parameters:
        @@ -510,11 +626,14 @@ attributed to each of the classes in order to compute class prevalence estimates
        -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
        +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

        Nothing to do here!

        Parameters:
        -

        classif_predictions – this is actually None

        +
          +
        • classif_predictions – not used

        • +
        • data – not used

        • +
        @@ -523,13 +642,13 @@ attributed to each of the classes in order to compute class prevalence estimates
        -quapy.method.aggregative.ClassifyAndCount
        +quapy.method.aggregative.ClassifyAndCount

        alias of CC

        -class quapy.method.aggregative.DMy(classifier, val_split=5, nbins=8, divergence: str | Callable = 'HD', cdf=False, search='optim_minimize', n_jobs=None)[source]
        +class quapy.method.aggregative.DMy(classifier, val_split=5, nbins=8, divergence: Union[str, Callable] = 'HD', cdf=False, search='optim_minimize', n_jobs=None)[source]

        Bases: AggregativeSoftQuantifier

        Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of posterior probabilities. This implementation takes the number of bins, the divergence, and the possibility to work on CDF @@ -555,7 +674,7 @@ Distance)

        -aggregate(posteriors: ndarray)[source]
        +aggregate(posteriors: ndarray)[source]

        Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution (the mixture) that best matches the test distribution, in terms of the divergence measure of choice. In the multiclass case, with n the number of classes, the test and mixture distributions contain @@ -573,8 +692,9 @@ independently. The matching is computed as an average of the divergence across a

        -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
        -

        Trains the classifier (if requested) and generates the validation distributions out of the training data. +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source] +

        Trains the aggregation function of a distribution matching method. This comes down to generating the +validation distributions out of the training data. The validation distributions have shape (n, ch, nbins), with n the number of classes, ch the number of channels, and nbins the number of bins. In particular, let V be the validation distributions; then di=V[i] are the distributions obtained from training data labelled with class i; while dij = di[j] is the discrete @@ -583,12 +703,9 @@ is the fraction of instances with a value in the k-th bin.

        Parameters:
          -
        • data – the training set

        • -
        • fit_classifier – set to False to bypass the training (the learner is assumed to be already fit)

        • -
        • val_split – either a float in (0,1) indicating the proportion of training instances to use for -validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection -indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV -to estimate the parameters

        • +
        • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the posterior probabilities issued by the classifier and, as labels, the true labels

        • +
        • data – a quapy.data.base.LabelledCollection consisting of the training data

        @@ -598,13 +715,13 @@ to estimate the parameters

        -quapy.method.aggregative.DistributionMatchingY
        +quapy.method.aggregative.DistributionMatchingY

        alias of DMy

        -class quapy.method.aggregative.DyS(classifier: BaseEstimator, val_split=5, n_bins=8, divergence: str | Callable = 'HD', tol=1e-05, n_jobs=None)[source]
        +class quapy.method.aggregative.DyS(classifier: BaseEstimator, val_split=5, n_bins=8, divergence: Union[str, Callable] = 'HD', tol=1e-05, n_jobs=None)[source]

        Bases: AggregativeSoftQuantifier, BinaryAggregativeQuantifier

        DyS framework (DyS). DyS is a generalization of HDy method, using a Ternary Search in order to find the prevalence that @@ -626,7 +743,7 @@ callable function computes the divergence between two distributions (two equally

        -aggregate(classif_posteriors)[source]
        +aggregate(classif_posteriors)[source]

        Implements the aggregation of label predictions.

        Parameters:
        @@ -640,13 +757,13 @@ callable function computes the divergence between two distributions (two equally
        -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
        -

        Trains the aggregation function.

        +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source] +

        Trains the aggregation function of DyS.

        Parameters:
          -
        • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

        • +
        • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the posterior probabilities issued by the classifier and, as labels, the true labels

        • data – a quapy.data.base.LabelledCollection consisting of the training data

        @@ -657,7 +774,7 @@ by the classifier

        -class quapy.method.aggregative.EMQ(classifier: BaseEstimator, val_split=None, exact_train_prev=True, recalib=None, n_jobs=None)[source]
        +class quapy.method.aggregative.EMQ(classifier: BaseEstimator, val_split=None, exact_train_prev=True, recalib=None, n_jobs=None)[source]

        Bases: AggregativeSoftQuantifier

        Expectation Maximization for Quantification (EMQ), aka Saerens-Latinne-Decaestecker (SLD) algorithm. @@ -694,7 +811,7 @@ an integer k –the number of folds.

        -classmethod EM(tr_prev, posterior_probabilities, epsilon=0.0001)[source]
        +classmethod EM(tr_prev, posterior_probabilities, epsilon=0.0001)[source]

        Computes the Expectation Maximization routine.

        Parameters:
        @@ -715,7 +832,7 @@ the corrected posterior probabilities (shape (n_instances, n_classes,)
        -classmethod EMQ_BCTS(classifier: BaseEstimator, n_jobs=None)[source]
        +classmethod EMQ_BCTS(classifier: BaseEstimator, n_jobs=None)[source]

        Constructs an instance of EMQ using the best configuration found in the Alexandari et al. paper, i.e., one that relies on Bias-Corrected Temperature Scaling (BCTS) as a recalibration function, and that uses an estimate of the training prevalence instead of the true training prevalence.

        @@ -734,17 +851,17 @@ the true training prevalence.

        -EPSILON = 0.0001
        +EPSILON = 0.0001
        -MAX_ITER = 1000
        +MAX_ITER = 1000
        -aggregate(classif_posteriors, epsilon=0.0001)[source]
        +aggregate(classif_posteriors, epsilon=0.0001)[source]

        Implements the aggregation of label predictions.

        Parameters:
        @@ -758,13 +875,14 @@ the true training prevalence.

        -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
        -

        Trains the aggregation function.

        +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source] +

        Trains the aggregation function of EMQ. This comes down to recalibrating the posterior probabilities +ir requested.

        Parameters:
          -
        • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

        • +
        • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the posterior probabilities issued by the classifier and, as labels, the true labels

        • data – a quapy.data.base.LabelledCollection consisting of the training data

        @@ -773,7 +891,7 @@ by the classifier

        -classify(instances)[source]
        +classify(instances)[source]

        Provides the posterior probabilities for the given instances. If the classifier was required to be recalibrated, then these posteriors are recalibrated accordingly.

        @@ -788,7 +906,7 @@ to be recalibrated, then these posteriors are recalibrated accordingly.

        -predict_proba(instances, epsilon=0.0001)[source]
        +predict_proba(instances, epsilon=0.0001)[source]

        Returns the posterior probabilities updated by the EM algorithm.

        Parameters:
        @@ -807,13 +925,13 @@ to be recalibrated, then these posteriors are recalibrated accordingly.

        -quapy.method.aggregative.ExpectationMaximizationQuantifier
        +quapy.method.aggregative.ExpectationMaximizationQuantifier

        alias of EMQ

        -class quapy.method.aggregative.HDy(classifier: BaseEstimator, val_split=5)[source]
        +class quapy.method.aggregative.HDy(classifier: BaseEstimator, val_split=5)[source]

        Bases: AggregativeSoftQuantifier, BinaryAggregativeQuantifier

        Hellinger Distance y (HDy). HDy is a probabilistic method for training binary quantifiers, that models quantification as the problem of @@ -833,7 +951,7 @@ validation distribution, or a

        -aggregate(classif_posteriors)[source]
        +aggregate(classif_posteriors)[source]

        Implements the aggregation of label predictions.

        Parameters:
        @@ -847,21 +965,16 @@ validation distribution, or a
        -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
        -

        Trains a HDy quantifier.

        +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source] +

        Trains the aggregation function of HDy.

        Parameters:
          -
        • data – the training set

        • -
        • fit_classifier – set to False to bypass the training (the learner is assumed to be already fit)

        • -
        • val_split – either a float in (0,1) indicating the proportion of training instances to use for -validation (e.g., 0.3 for using 30% of the training set as validation data), or a -quapy.data.base.LabelledCollection indicating the validation set itself

        • +
        • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the posterior probabilities issued by the classifier and, as labels, the true labels

        • +
        • data – a quapy.data.base.LabelledCollection consisting of the training data

        -
        Returns:
        -

        self

        -
        @@ -869,13 +982,13 @@ validation (e.g., 0.3 for using 30% of the training set as validation data), or
        -quapy.method.aggregative.HellingerDistanceY
        +quapy.method.aggregative.HellingerDistanceY

        alias of HDy

        -class quapy.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs=None, parallel_backend='multiprocessing')[source]
        +class quapy.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs=None, parallel_backend='multiprocessing')[source]

        Bases: OneVsAllGeneric, AggregativeQuantifier

        Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the @@ -896,7 +1009,7 @@ is removed and no longer available at predict time.

        -aggregate(classif_predictions)[source]
        +aggregate(classif_predictions)[source]

        Implements the aggregation of label predictions.

        Parameters:
        @@ -910,7 +1023,7 @@ is removed and no longer available at predict time.

        -classify(instances)[source]
        +classify(instances)[source]

        If the base quantifier is not probabilistic, returns a matrix of shape (n,m,) with n the number of instances and m the number of classes. The entry (i,j) is a binary value indicating whether instance i `belongs to class `j. The binary classifications are independent of each other, meaning that an instance @@ -933,7 +1046,7 @@ probabilities are independent of each other, meaning that, in general, they do n

        -class quapy.method.aggregative.PACC(classifier: BaseEstimator, val_split=5, n_jobs=None, solver='minimize')[source]
        +class quapy.method.aggregative.PACC(classifier: BaseEstimator, val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', clipping: Literal['clip', 'none', 'project'] = 'clip', n_jobs=None)[source]

        Bases: AggregativeSoftQuantifier

        Probabilistic Adjusted Classify & Count, the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.

        @@ -947,25 +1060,34 @@ be extracted from the training set; or as an integer (default 5), indicating tha are to be generated in a k-fold cross-validation manner (with this integer indicating the value for k). Alternatively, this set can be specified at fit time by indicating the exact set of data on which the predictions are to be generated.

        -
      • n_jobs – number of parallel workers

      • -
      • solver

        indicates the method to be used for obtaining the final estimates. The choice -‘exact’ comes down to solving the system of linear equations \(Ax=B\) where A is a -matrix containing the class-conditional probabilities of the predictions (e.g., the tpr and fpr in -binary) and B is the vector of prevalence values estimated via CC, as \(x=A^{-1}B\). This solution -might not exist for degenerated classifiers, in which case the method defaults to classify and count -(i.e., does not attempt any adjustment). -Another option is to search for the prevalence vector that minimizes the L2 norm of \(|Ax-B|\). The latter -is achieved by indicating solver=’minimize’. This one generally works better, and is the default parameter. -More details about this can be consulted in Bunse, M. “On Multi-Class Extensions of Adjusted Classify and -Count”, on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications -(LQ 2022), ECML/PKDD 2022, Grenoble (France).

        +
      • method (str) –

        adjustment method to be used:

        +
          +
        • ’inversion’: matrix inversion method based on the matrix equality \(P(C)=P(C|Y)P(Y)\), which tries to invert P(C|Y) matrix.

        • +
        • ’invariant-ratio’: invariant ratio estimator of Vaz et al., which replaces the last equation with the normalization condition.

        • +

      • +
      • solver (str) –

        the method to use for solving the system of linear equations. Valid options are:

        + +

      • +
      • clipping (str) –

        the method to use for normalization.

        +
          +
        • If None or “none”, no normalization is performed.

        • +
        • If “clip”, the values are clipped to the range [0,1] and normalized, so they sum up to 1.

        • +
        • If “project”, the values are projected onto the probability simplex.

        • +
        +

      • +
      • n_jobs – number of parallel workers

      -aggregate(classif_posteriors)[source]
      +aggregate(classif_posteriors)[source]

      Implements the aggregation of label predictions.

      Parameters:
      @@ -979,25 +1101,29 @@ Count”, on proceedings of the 2nd International Workshop on Learning to Quanti
      -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
      +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

      Estimates the misclassification rates

      Parameters:
      -

      classif_predictions – classifier soft predictions with true labels

      +
      -classmethod getPteCondEstim(classes, y, y_)[source]
      +classmethod getPteCondEstim(classes, y, y_)[source]
      -class quapy.method.aggregative.PCC(classifier: BaseEstimator)[source]
      +class quapy.method.aggregative.PCC(classifier: BaseEstimator)[source]

      Bases: AggregativeSoftQuantifier

      Probabilistic Classify & Count, the probabilistic variant of CC that relies on the posterior probabilities returned by a probabilistic classifier.

      @@ -1008,7 +1134,7 @@ the probabilistic variant of CC that relies on the posterior probabilities retur
      -aggregate(classif_posteriors)[source]
      +aggregate(classif_posteriors)[source]

      Implements the aggregation of label predictions.

      Parameters:
      @@ -1022,11 +1148,14 @@ the probabilistic variant of CC that relies on the posterior probabilities retur
      -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
      +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

      Nothing to do here!

      Parameters:
      -

      classif_predictions – this is actually None

      +
        +
      • classif_predictions – not used

      • +
      • data – not used

      • +
      @@ -1035,25 +1164,25 @@ the probabilistic variant of CC that relies on the posterior probabilities retur
      -quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount
      +quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount

      alias of PACC

      -quapy.method.aggregative.ProbabilisticClassifyAndCount
      +quapy.method.aggregative.ProbabilisticClassifyAndCount

      alias of PCC

      -quapy.method.aggregative.SLD
      +quapy.method.aggregative.SLD

      alias of EMQ

      -class quapy.method.aggregative.SMM(classifier: BaseEstimator, val_split=5)[source]
      +class quapy.method.aggregative.SMM(classifier: BaseEstimator, val_split=5)[source]

      Bases: AggregativeSoftQuantifier, BinaryAggregativeQuantifier

      SMM method (SMM). SMM is a simplification of matching distribution methods where the representation of the examples @@ -1069,7 +1198,7 @@ validation distribution, or a

      -aggregate(classif_posteriors)[source]
      +aggregate(classif_posteriors)[source]

      Implements the aggregation of label predictions.

      Parameters:
      @@ -1083,13 +1212,13 @@ validation distribution, or a
      -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
      -

      Trains the aggregation function.

      +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source] +

      Trains the aggregation function of SMM.

      Parameters:
        -
      • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

      • +
      • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the posterior probabilities issued by the classifier and, as labels, the true labels

      • data – a quapy.data.base.LabelledCollection consisting of the training data

      @@ -1100,7 +1229,7 @@ by the classifier

      -quapy.method.aggregative.newELM(svmperf_base=None, loss='01', C=1)[source]
      +quapy.method.aggregative.newELM(svmperf_base=None, loss='01', C=1)[source]

      Explicit Loss Minimization (ELM) quantifiers. Quantifiers based on ELM represent a family of methods based on structured output learning; these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss @@ -1130,7 +1259,7 @@ underlying classifier

      -quapy.method.aggregative.newSVMAE(svmperf_base=None, C=1)[source]
      +quapy.method.aggregative.newSVMAE(svmperf_base=None, C=1)[source]

      SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by Moreo and Sebastiani, 2021. Equivalent to:

      @@ -1161,7 +1290,7 @@ underlying classifier

      -quapy.method.aggregative.newSVMKLD(svmperf_base=None, C=1)[source]
      +quapy.method.aggregative.newSVMKLD(svmperf_base=None, C=1)[source]

      SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence normalized via the logistic function, as proposed by Esuli et al. 2015. @@ -1193,7 +1322,7 @@ underlying classifier

      -quapy.method.aggregative.newSVMQ(svmperf_base=None, C=1)[source]
      +quapy.method.aggregative.newSVMQ(svmperf_base=None, C=1)[source]

      SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Q loss combining a classification-oriented loss and a quantification-oriented loss, as proposed by Barranquero et al. 2015. @@ -1225,7 +1354,7 @@ underlying classifier

      -quapy.method.aggregative.newSVMRAE(svmperf_base=None, C=1)[source]
      +quapy.method.aggregative.newSVMRAE(svmperf_base=None, C=1)[source]

      SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first used by Moreo and Sebastiani, 2021. Equivalent to:

      @@ -1254,19 +1383,19 @@ underlying classifier

      -
      +
      -class quapy.method._kdey.KDEBase[source]
      +class quapy.method._kdey.KDEBase[source]

      Bases: object

      Common ancestor for KDE-based methods. Implements some common routines.

      -BANDWIDTH_METHOD = ['scott', 'silverman']
      +BANDWIDTH_METHOD = ['scott', 'silverman']
      -get_kde_function(X, bandwidth)[source]
      +get_kde_function(X, bandwidth)[source]

      Wraps the KDE function from scikit-learn.

      Parameters:
      @@ -1283,7 +1412,7 @@ underlying classifier

      -get_mixture_components(X, y, n_classes, bandwidth)[source]
      +get_mixture_components(X, y, classes, bandwidth)[source]

      Returns an array containing the mixture components, i.e., the KDE functions for each class.

      Parameters:
      @@ -1302,7 +1431,7 @@ underlying classifier

      -pdf(kde, X)[source]
      +pdf(kde, X)[source]

      Wraps the density evalution of scikit-learn’s KDE. Scikit-learn returns log-scores (s), so this function returns \(e^{s}\)

      @@ -1322,7 +1451,7 @@ function returns \(e^{s}\)

      -class quapy.method._kdey.KDEyCS(classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None)[source]
      +class quapy.method._kdey.KDEyCS(classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None)[source]

      Bases: AggregativeSoftQuantifier

      Kernel Density Estimation model for quantification (KDEy) relying on the Cauchy-Schwarz divergence (CS) as the divergence measure to be minimized. This method was first proposed in the paper @@ -1356,7 +1485,7 @@ on which the predictions are to be generated.

      -aggregate(posteriors: ndarray)[source]
      +aggregate(posteriors: ndarray)[source]

      Implements the aggregation of label predictions.

      Parameters:
      @@ -1370,13 +1499,13 @@ on which the predictions are to be generated.

      -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
      +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

      Trains the aggregation function.

      Parameters:
        -
      • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

      • +
      • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the predictions issued by the classifier and, as labels, the true labels

      • data – a quapy.data.base.LabelledCollection consisting of the training data

      @@ -1385,14 +1514,14 @@ by the classifier

      -gram_matrix_mix_sum(X, Y=None)[source]
      +gram_matrix_mix_sum(X, Y=None)[source]
      -class quapy.method._kdey.KDEyHD(classifier: BaseEstimator, val_split=10, divergence: str = 'HD', bandwidth=0.1, n_jobs=None, random_state=None, montecarlo_trials=10000)[source]
      +class quapy.method._kdey.KDEyHD(classifier: BaseEstimator, val_split=10, divergence: str = 'HD', bandwidth=0.1, n_jobs=None, random_state=None, montecarlo_trials=10000)[source]

      Bases: AggregativeSoftQuantifier, KDEBase

      Kernel Density Estimation model for quantification (KDEy) relying on the squared Hellinger Disntace (HD) as the divergence measure to be minimized. This method was first proposed in the paper @@ -1432,7 +1561,7 @@ on which the predictions are to be generated.

      -aggregate(posteriors: ndarray)[source]
      +aggregate(posteriors: ndarray)[source]

      Implements the aggregation of label predictions.

      Parameters:
      @@ -1446,13 +1575,13 @@ on which the predictions are to be generated.

      -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
      +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

      Trains the aggregation function.

      Parameters:
        -
      • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

      • +
      • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the predictions issued by the classifier and, as labels, the true labels

      • data – a quapy.data.base.LabelledCollection consisting of the training data

      @@ -1463,7 +1592,7 @@ by the classifier

      -class quapy.method._kdey.KDEyML(classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=None)[source]
      +class quapy.method._kdey.KDEyML(classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=None)[source]

      Bases: AggregativeSoftQuantifier, KDEBase

      Kernel Density Estimation model for quantification (KDEy) relying on the Kullback-Leibler divergence (KLD) as the divergence measure to be minimized. This method was first proposed in the paper @@ -1500,7 +1629,7 @@ on which the predictions are to be generated.

      -aggregate(posteriors: ndarray)[source]
      +aggregate(posteriors: ndarray)[source]

      Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood of the data (i.e., that minimizes the negative log-likelihood)

      @@ -1515,13 +1644,13 @@ of the data (i.e., that minimizes the negative log-likelihood)

      -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
      +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

      Trains the aggregation function.

      Parameters:
        -
      • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

      • +
      • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the predictions issued by the classifier and, as labels, the true labels

      • data – a quapy.data.base.LabelledCollection consisting of the training data

      @@ -1530,9 +1659,9 @@ by the classifier

      -
      +
      -class quapy.method._neural.QuaNetModule(doc_embedding_size, n_classes, stats_size, lstm_hidden_size=64, lstm_nlayers=1, ff_layers=[1024, 512], bidirectional=True, qdrop_p=0.5, order_by=0)[source]
      +class quapy.method._neural.QuaNetModule(doc_embedding_size, n_classes, stats_size, lstm_hidden_size=64, lstm_nlayers=1, ff_layers=[1024, 512], bidirectional=True, qdrop_p=0.5, order_by=0)[source]

      Bases: Module

      Implements the QuaNet forward pass. See QuaNetTrainer for training QuaNet.

      @@ -1554,12 +1683,12 @@ quantification embedding

      -property device
      +property device
      -forward(doc_embeddings, doc_posteriors, statistics)[source]
      +forward(doc_embeddings, doc_posteriors, statistics)[source]

      Defines the computation performed at every call.

      Should be overridden by all subclasses.

      @@ -1571,11 +1700,16 @@ registered hooks while the latter silently ignores them.

      +
      +
      +training: bool
      +
      +
      -class quapy.method._neural.QuaNetTrainer(classifier, sample_size=None, n_epochs=100, tr_iter_per_poch=500, va_iter_per_poch=100, lr=0.001, lstm_hidden_size=64, lstm_nlayers=1, ff_layers=[1024, 512], bidirectional=True, qdrop_p=0.5, patience=10, checkpointdir='../checkpoint', checkpointname=None, device='cuda')[source]
      +class quapy.method._neural.QuaNetTrainer(classifier, sample_size=None, n_epochs=100, tr_iter_per_poch=500, va_iter_per_poch=100, lr=0.001, lstm_hidden_size=64, lstm_nlayers=1, ff_layers=[1024, 512], bidirectional=True, qdrop_p=0.5, patience=10, checkpointdir='../checkpoint', checkpointname=None, device='cuda')[source]

      Bases: BaseQuantifier

      Implementation of QuaNet, a neural network for quantification. This implementation uses PyTorch and can take advantage of GPU @@ -1630,24 +1764,24 @@ training phase (early stopping)

      -property classes_
      +property classes_
      -clean_checkpoint()[source]
      +clean_checkpoint()[source]

      Removes the checkpoint

      -clean_checkpoint_dir()[source]
      +clean_checkpoint_dir()[source]

      Removes anything contained in the checkpoint directory

      -fit(data: LabelledCollection, fit_classifier=True)[source]
      +fit(data: LabelledCollection, fit_classifier=True)[source]

      Trains QuaNet.

      Parameters:
      @@ -1666,7 +1800,7 @@ training phase (early stopping)

      -get_params(deep=True)[source]
      +get_params(deep=True)[source]

      Get parameters for this estimator.

      Parameters:
      @@ -1684,7 +1818,7 @@ contained subobjects that are estimators.

      -quantify(instances)[source]
      +quantify(instances)[source]

      Generate class prevalence estimates for the sample’s instances

      Parameters:
      @@ -1698,7 +1832,7 @@ contained subobjects that are estimators.

      -set_params(**parameters)[source]
      +set_params(**parameters)[source]

      Set the parameters of this estimator.

      The method works on simple estimators as well as on nested objects (such as Pipeline). The latter have @@ -1721,7 +1855,7 @@ possible to update each component of a nested object.

      -quapy.method._neural.mae_loss(output, target)[source]
      +quapy.method._neural.mae_loss(output, target)[source]

      Torch-like wrapper for the Mean Absolute Error

      Parameters:
      @@ -1736,9 +1870,9 @@ possible to update each component of a nested object.

      -
      +
      -class quapy.method._threshold_optim.MAX(classifier: BaseEstimator, val_split=5)[source]
      +class quapy.method._threshold_optim.MAX(classifier: BaseEstimator, val_split=5)[source]

      Bases: ThresholdOptimization

      Threshold Optimization variant for ACC as proposed by Forman 2006 and @@ -1760,7 +1894,7 @@ validation data, or as an integer, indicating that the misclassification rates s

      -condition(tpr, fpr) float[source]
      +condition(tpr, fpr) float[source]

      Implements the criterion according to which the threshold should be selected. This function should return the (float) score to be minimized.

      @@ -1780,7 +1914,7 @@ This function should return the (float) score to be minimized.

      -class quapy.method._threshold_optim.MS(classifier: BaseEstimator, val_split=5)[source]
      +class quapy.method._threshold_optim.MS(classifier: BaseEstimator, val_split=5)[source]

      Bases: ThresholdOptimization

      Median Sweep. Threshold Optimization variant for ACC as proposed by Forman 2006 and @@ -1802,7 +1936,7 @@ validation data, or as an integer, indicating that the misclassification rates s

      -aggregate(classif_predictions: ndarray)[source]
      +aggregate(classif_predictions: ndarray)[source]

      Implements the aggregation of label predictions.

      Parameters:
      @@ -1816,13 +1950,13 @@ validation data, or as an integer, indicating that the misclassification rates s
      -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
      +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

      Trains the aggregation function.

      Parameters:
        -
      • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

      • +
      • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the predictions issued by the classifier and, as labels, the true labels

      • data – a quapy.data.base.LabelledCollection consisting of the training data

      @@ -1831,7 +1965,7 @@ by the classifier

      -condition(tpr, fpr) float[source]
      +condition(tpr, fpr) float[source]

      Implements the criterion according to which the threshold should be selected. This function should return the (float) score to be minimized.

      @@ -1851,7 +1985,7 @@ This function should return the (float) score to be minimized.

      -class quapy.method._threshold_optim.MS2(classifier: BaseEstimator, val_split=5)[source]
      +class quapy.method._threshold_optim.MS2(classifier: BaseEstimator, val_split=5)[source]

      Bases: MS

      Median Sweep 2. Threshold Optimization variant for ACC as proposed by Forman 2006 and @@ -1874,7 +2008,7 @@ validation data, or as an integer, indicating that the misclassification rates s

      -discard(tpr, fpr) bool[source]
      +discard(tpr, fpr) bool[source]

      Indicates whether a combination of tpr and fpr should be discarded

      Parameters:
      @@ -1893,7 +2027,7 @@ validation data, or as an integer, indicating that the misclassification rates s
      -class quapy.method._threshold_optim.T50(classifier: BaseEstimator, val_split=5)[source]
      +class quapy.method._threshold_optim.T50(classifier: BaseEstimator, val_split=5)[source]

      Bases: ThresholdOptimization

      Threshold Optimization variant for ACC as proposed by Forman 2006 and @@ -1915,7 +2049,7 @@ validation data, or as an integer, indicating that the misclassification rates s

      -condition(tpr, fpr) float[source]
      +condition(tpr, fpr) float[source]

      Implements the criterion according to which the threshold should be selected. This function should return the (float) score to be minimized.

      @@ -1935,7 +2069,7 @@ This function should return the (float) score to be minimized.

      -class quapy.method._threshold_optim.ThresholdOptimization(classifier: BaseEstimator, val_split=5, n_jobs=None)[source]
      +class quapy.method._threshold_optim.ThresholdOptimization(classifier: BaseEstimator, val_split=5, n_jobs=None)[source]

      Bases: BinaryAggregativeQuantifier

      Abstract class of Threshold Optimization variants for ACC as proposed by Forman 2006 and @@ -1959,7 +2093,7 @@ validation data, or as an integer, indicating that the misclassification rates s

      -aggregate(classif_predictions: ndarray)[source]
      +aggregate(classif_predictions: ndarray)[source]

      Implements the aggregation of label predictions.

      Parameters:
      @@ -1973,18 +2107,18 @@ validation data, or as an integer, indicating that the misclassification rates s
      -aggregate_with_threshold(classif_predictions, tprs, fprs, thresholds)[source]
      +aggregate_with_threshold(classif_predictions, tprs, fprs, thresholds)[source]
      -aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]
      +aggregation_fit(classif_predictions: LabelledCollection, data: LabelledCollection)[source]

      Trains the aggregation function.

      Parameters:
        -
      • classif_predictions – a LabelledCollection containing the label predictions issued -by the classifier

      • +
      • classif_predictions – a quapy.data.base.LabelledCollection containing, +as instances, the predictions issued by the classifier and, as labels, the true labels

      • data – a quapy.data.base.LabelledCollection consisting of the training data

      @@ -1993,7 +2127,7 @@ by the classifier

      -abstract condition(tpr, fpr) float[source]
      +abstract condition(tpr, fpr) float[source]

      Implements the criterion according to which the threshold should be selected. This function should return the (float) score to be minimized.

      @@ -2011,7 +2145,7 @@ This function should return the (float) score to be minimized.

      -discard(tpr, fpr) bool[source]
      +discard(tpr, fpr) bool[source]

      Indicates whether a combination of tpr and fpr should be discarded

      Parameters:
      @@ -2030,7 +2164,7 @@ This function should return the (float) score to be minimized.

      -class quapy.method._threshold_optim.X(classifier: BaseEstimator, val_split=5)[source]
      +class quapy.method._threshold_optim.X(classifier: BaseEstimator, val_split=5)[source]

      Bases: ThresholdOptimization

      Threshold Optimization variant for ACC as proposed by Forman 2006 and @@ -2052,7 +2186,7 @@ validation data, or as an integer, indicating that the misclassification rates s

      -condition(tpr, fpr) float[source]
      +condition(tpr, fpr) float[source]

      Implements the criterion according to which the threshold should be selected. This function should return the (float) score to be minimized.

      @@ -2072,17 +2206,17 @@ This function should return the (float) score to be minimized.

      -

      quapy.method.base module

      +

      quapy.method.base module

      -class quapy.method.base.BaseQuantifier[source]
      +class quapy.method.base.BaseQuantifier[source]

      Bases: BaseEstimator

      Abstract Quantifier. A quantifier is defined as an object of a class that implements the method fit() on quapy.data.base.LabelledCollection, the method quantify(), and the set_params() and get_params() for model selection (see quapy.model_selection.GridSearchQ())

      -abstract fit(data: LabelledCollection)[source]
      +abstract fit(data: LabelledCollection)[source]

      Trains a quantifier.

      Parameters:
      @@ -2096,7 +2230,7 @@ This function should return the (float) score to be minimized.

      -abstract quantify(instances)[source]
      +abstract quantify(instances)[source]

      Generate class prevalence estimates for the sample’s instances

      Parameters:
      @@ -2112,7 +2246,7 @@ This function should return the (float) score to be minimized.

      -class quapy.method.base.BinaryQuantifier[source]
      +class quapy.method.base.BinaryQuantifier[source]

      Bases: BaseQuantifier

      Abstract class of binary quantifiers, i.e., quantifiers estimating class prevalence values for only two classes (typically, to be interpreted as one class and its complement).

      @@ -2120,24 +2254,24 @@ This function should return the (float) score to be minimized.

      -class quapy.method.base.OneVsAll[source]
      +class quapy.method.base.OneVsAll[source]

      Bases: object

      -class quapy.method.base.OneVsAllGeneric(binary_quantifier, n_jobs=None)[source]
      +class quapy.method.base.OneVsAllGeneric(binary_quantifier, n_jobs=None)[source]

      Bases: OneVsAll, BaseQuantifier

      Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.

      -property classes_
      +property classes_
      -fit(data: LabelledCollection, fit_classifier=True)[source]
      +fit(data: LabelledCollection, fit_classifier=True)[source]

      Trains a quantifier.

      Parameters:
      @@ -2151,7 +2285,7 @@ quantifier for each class, and then l1-normalizes the outputs so that the class
      -quantify(instances)[source]
      +quantify(instances)[source]

      Generate class prevalence estimates for the sample’s instances

      Parameters:
      @@ -2167,15 +2301,15 @@ quantifier for each class, and then l1-normalizes the outputs so that the class
      -quapy.method.base.newOneVsAll(binary_quantifier, n_jobs=None)[source]
      +quapy.method.base.newOneVsAll(binary_quantifier, n_jobs=None)[source]
      -

      quapy.method.meta module

      +

      quapy.method.meta module

      -quapy.method.meta.EACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
      +quapy.method.meta.EACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]

      Implements an ensemble of quapy.method.aggregative.ACC quantifiers, as used by Pérez-Gállego et al., 2019.

      Equivalent to:

      @@ -2202,7 +2336,7 @@ quantifier for each class, and then l1-normalizes the outputs so that the class
      -quapy.method.meta.ECC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
      +quapy.method.meta.ECC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]

      Implements an ensemble of quapy.method.aggregative.CC quantifiers, as used by Pérez-Gállego et al., 2019.

      Equivalent to:

      @@ -2229,7 +2363,7 @@ quantifier for each class, and then l1-normalizes the outputs so that the class
      -quapy.method.meta.EEMQ(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
      +quapy.method.meta.EEMQ(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]

      Implements an ensemble of quapy.method.aggregative.EMQ quantifiers.

      Equivalent to:

      >>> ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs)
      @@ -2255,7 +2389,7 @@ quantifier for each class, and then l1-normalizes the outputs so that the class
       
       
      -quapy.method.meta.EHDy(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
      +quapy.method.meta.EHDy(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]

      Implements an ensemble of quapy.method.aggregative.HDy quantifiers, as used by Pérez-Gállego et al., 2019.

      Equivalent to:

      @@ -2282,7 +2416,7 @@ quantifier for each class, and then l1-normalizes the outputs so that the class
      -quapy.method.meta.EPACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]
      +quapy.method.meta.EPACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs)[source]

      Implements an ensemble of quapy.method.aggregative.PACC quantifiers.

      Equivalent to:

      >>> ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs)
      @@ -2308,11 +2442,11 @@ quantifier for each class, and then l1-normalizes the outputs so that the class
       
       
      -class quapy.method.meta.Ensemble(quantifier: BaseQuantifier, size=50, red_size=25, min_pos=5, policy='ave', max_sample_size=None, val_split: LabelledCollection | float | None = None, n_jobs=None, verbose=False)[source]
      +class quapy.method.meta.Ensemble(quantifier: BaseQuantifier, size=50, red_size=25, min_pos=5, policy='ave', max_sample_size=None, val_split: Optional[Union[LabelledCollection, float]] = None, n_jobs=None, verbose=False)[source]

      Bases: BaseQuantifier

      -VALID_POLICIES = {'ave', 'ds', 'mae', 'mkld', 'mnae', 'mnkld', 'mnrae', 'mrae', 'mse', 'ptr'}
      +VALID_POLICIES = {'ave', 'ds', 'mae', 'mkld', 'mnae', 'mnkld', 'mnrae', 'mrae', 'mse', 'ptr'}

      Implementation of the Ensemble methods for quantification described by Pérez-Gállego et al., 2017 and @@ -2357,7 +2491,7 @@ validation split, or a

      -property aggregative
      +property aggregative

      Indicates that the quantifier is not aggregative.

      Returns:
      @@ -2368,7 +2502,7 @@ validation split, or a
      -fit(data: LabelledCollection, val_split: LabelledCollection | float | None = None)[source]
      +fit(data: LabelledCollection, val_split: Optional[Union[LabelledCollection, float]] = None)[source]

      Trains a quantifier.

      Parameters:
      @@ -2382,7 +2516,7 @@ validation split, or a
      -get_params(deep=True)[source]
      +get_params(deep=True)[source]

      This function should not be used within quapy.model_selection.GridSearchQ (is here for compatibility with the abstract class). Instead, use Ensemble(GridSearchQ(q),…), with q a Quantifier (recommended), or @@ -2400,7 +2534,7 @@ classification (not recommended).

      -property probabilistic
      +property probabilistic

      Indicates that the quantifier is not probabilistic.

      Returns:
      @@ -2411,7 +2545,7 @@ classification (not recommended).

      -quantify(instances)[source]
      +quantify(instances)[source]

      Generate class prevalence estimates for the sample’s instances

      Parameters:
      @@ -2425,7 +2559,7 @@ classification (not recommended).

      -set_params(**parameters)[source]
      +set_params(**parameters)[source]

      This function should not be used within quapy.model_selection.GridSearchQ (is here for compatibility with the abstract class). Instead, use Ensemble(GridSearchQ(q),…), with q a Quantifier (recommended), or @@ -2445,7 +2579,7 @@ classification (not recommended).

      -class quapy.method.meta.MedianEstimator(base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]
      +class quapy.method.meta.MedianEstimator(base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]

      Bases: BinaryQuantifier

      This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the estimation returned by differently (hyper)parameterized base quantifiers. @@ -2463,7 +2597,7 @@ i.e., in cases of binary quantification.

      -fit(training: LabelledCollection)[source]
      +fit(training: LabelledCollection)[source]

      Trains a quantifier.

      Parameters:
      @@ -2477,7 +2611,7 @@ i.e., in cases of binary quantification.

      -get_params(deep=True)[source]
      +get_params(deep=True)[source]

      Get parameters for this estimator.

      Parameters:
      @@ -2495,7 +2629,7 @@ contained subobjects that are estimators.

      -quantify(instances)[source]
      +quantify(instances)[source]

      Generate class prevalence estimates for the sample’s instances

      Parameters:
      @@ -2509,7 +2643,7 @@ contained subobjects that are estimators.

      -set_params(**params)[source]
      +set_params(**params)[source]

      Set the parameters of this estimator.

      The method works on simple estimators as well as on nested objects (such as Pipeline). The latter have @@ -2532,7 +2666,7 @@ possible to update each component of a nested object.

      -class quapy.method.meta.MedianEstimator2(base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]
      +class quapy.method.meta.MedianEstimator2(base_quantifier: BinaryQuantifier, param_grid: dict, random_state=None, n_jobs=None)[source]

      Bases: BinaryQuantifier

      This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the estimation returned by differently (hyper)parameterized base quantifiers. @@ -2550,7 +2684,7 @@ i.e., in cases of binary quantification.

      -fit(training: LabelledCollection)[source]
      +fit(training: LabelledCollection)[source]

      Trains a quantifier.

      Parameters:
      @@ -2564,7 +2698,7 @@ i.e., in cases of binary quantification.

      -get_params(deep=True)[source]
      +get_params(deep=True)[source]

      Get parameters for this estimator.

      Parameters:
      @@ -2582,7 +2716,7 @@ contained subobjects that are estimators.

      -quantify(instances)[source]
      +quantify(instances)[source]

      Generate class prevalence estimates for the sample’s instances

      Parameters:
      @@ -2596,7 +2730,7 @@ contained subobjects that are estimators.

      -set_params(**params)[source]
      +set_params(**params)[source]

      Set the parameters of this estimator.

      The method works on simple estimators as well as on nested objects (such as Pipeline). The latter have @@ -2619,7 +2753,7 @@ possible to update each component of a nested object.

      -quapy.method.meta.ensembleFactory(classifier, base_quantifier_class, param_grid=None, optim=None, param_model_sel: dict | None = None, **kwargs)[source]
      +quapy.method.meta.ensembleFactory(classifier, base_quantifier_class, param_grid=None, optim=None, param_model_sel: Optional[dict] = None, **kwargs)[source]

      Ensemble factory. Provides a unified interface for instantiating ensembles that can be optimized (via model selection for quantification) for a given evaluation metric using quapy.model_selection.GridSearchQ. If the evaluation metric is classification-oriented @@ -2669,7 +2803,7 @@ in terms of this error.

      -quapy.method.meta.get_probability_distribution(posterior_probabilities, bins=8)[source]
      +quapy.method.meta.get_probability_distribution(posterior_probabilities, bins=8)[source]

      Gets a histogram out of the posterior probabilities (only for the binary case).

      Parameters:
      @@ -2686,10 +2820,10 @@ in terms of this error.

      -

      quapy.method.non_aggregative module

      +

      quapy.method.non_aggregative module

      -class quapy.method.non_aggregative.DMx(nbins=8, divergence: str | Callable = 'HD', cdf=False, search='optim_minimize', n_jobs=None)[source]
      +class quapy.method.non_aggregative.DMx(nbins=8, divergence: Union[str, Callable] = 'HD', cdf=False, search='optim_minimize', n_jobs=None)[source]

      Bases: BaseQuantifier

      Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of covariates. This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters.

      @@ -2707,7 +2841,7 @@ Distance)

      -classmethod HDx(n_jobs=None)[source]
      +classmethod HDx(n_jobs=None)[source]

      Hellinger Distance x (HDx). HDx is a method for training binary quantifiers, that models quantification as the problem of minimizing the average divergence (in terms of the Hellinger Distance) across the feature-specific normalized @@ -2729,7 +2863,7 @@ González-Castro, Alaiz-Rodríguez, Alegre (2013)

      -fit(data: LabelledCollection)[source]
      +fit(data: LabelledCollection)[source]

      Generates the validation distributions out of the training data (covariates). The validation distributions have shape (n, nfeats, nbins), with n the number of classes, nfeats the number of features, and nbins the number of bins. @@ -2745,7 +2879,7 @@ training data labelled with class i, and dij[k] is the

      -quantify(instances)[source]
      +quantify(instances)[source]

      Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution (the mixture) that best matches the test distribution, in terms of the divergence measure of choice. The matching is computed as the average dissimilarity (in terms of the dissimilarity measure of choice) @@ -2764,13 +2898,13 @@ between all feature-specific discrete distributions.

      -quapy.method.non_aggregative.DistributionMatchingX
      +quapy.method.non_aggregative.DistributionMatchingX

      alias of DMx

      -class quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation[source]
      +class quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation[source]

      Bases: BaseQuantifier

      The Maximum Likelihood Prevalence Estimation (MLPE) method is a lazy method that assumes there is no prior probability shift between training and test instances (put it other way, that the i.i.d. assumpion holds). @@ -2779,7 +2913,7 @@ itself) the class prevalence seen during training. This method is considered to any quantification method should beat.

      -fit(data: LabelledCollection)[source]
      +fit(data: LabelledCollection)[source]

      Computes the training prevalence and stores it.

      Parameters:
      @@ -2793,7 +2927,7 @@ any quantification method should beat.

      -quantify(instances)[source]
      +quantify(instances)[source]

      Ignores the input instances and returns, as the class prevalence estimantes, the training prevalence.

      Parameters:
      @@ -2807,9 +2941,48 @@ any quantification method should beat.

      +
      +
      +class quapy.method.non_aggregative.ReadMe(bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs)[source]
      +

      Bases: BaseQuantifier

      +
      +
      +fit(data: LabelledCollection)[source]
      +

      Trains a quantifier.

      +
      +
      Parameters:
      +

      data – a quapy.data.base.LabelledCollection consisting of the training data

      +
      +
      Returns:
      +

      self

      +
      +
      +
      + +
      +
      +quantify(instances)[source]
      +

      Generate class prevalence estimates for the sample’s instances

      +
      +
      Parameters:
      +

      instances – array-like

      +
      +
      Returns:
      +

      np.ndarray of shape (n_classes,) with class prevalence estimates.

      +
      +
      +
      + +
      +
      +std_constrained_linear_ls(X, class_cond_X: dict)[source]
      +
      + +
      +
      -

      Module contents

      +

      Module contents

      diff --git a/docs/build/html/search.html b/docs/build/html/search.html index ba1b54d..bf88dae 100644 --- a/docs/build/html/search.html +++ b/docs/build/html/search.html @@ -1,11 +1,11 @@ - + - Search — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation - - + Search — QuaPy: A Python-based open-source framework for quantification 0.1.9 documentation + + @@ -13,11 +13,12 @@ - - - - - + + + + + + diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index a2b126d..7f2792c 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"alltitles": {"Contents": [[1, "contents"]], "GitHub": [[1, "github"]], "Indices and tables": [[1, "indices-and-tables"]], "Installation": [[1, "installation"]], "Module contents": [[3, "module-quapy"], [4, "module-quapy.classification"], [5, "module-quapy.data"], [6, "module-quapy.method"]], "Submodules": [[3, "submodules"], [4, "submodules"], [5, "submodules"], [6, "submodules"]], "Subpackages": [[3, "subpackages"]], "Welcome to QuaPy\u2019s documentation!": [[1, "welcome-to-quapy-s-documentation"]], "quapy": [[0, "module-quapy"], [2, "quapy"]], "quapy package": [[3, "quapy-package"]], "quapy.classification package": [[4, "quapy-classification-package"]], "quapy.classification.calibration module": [[4, "module-quapy.classification.calibration"]], "quapy.classification.methods module": [[4, "module-quapy.classification.methods"]], "quapy.classification.neural module": [[4, "module-quapy.classification.neural"]], "quapy.classification.svmperf module": [[4, "module-quapy.classification.svmperf"]], "quapy.data package": [[5, "quapy-data-package"]], "quapy.data.base module": [[5, "module-quapy.data.base"]], "quapy.data.datasets module": [[5, "module-quapy.data.datasets"]], "quapy.data.preprocessing module": [[5, "module-quapy.data.preprocessing"]], "quapy.data.reader module": [[5, "module-quapy.data.reader"]], "quapy.error module": [[3, "module-quapy.error"]], "quapy.evaluation module": [[3, "module-quapy.evaluation"]], "quapy.functional module": [[3, "module-quapy.functional"]], "quapy.method package": [[6, "quapy-method-package"]], "quapy.method.aggregative module": [[6, "module-quapy.method.aggregative"]], "quapy.method.base module": [[6, "module-quapy.method.base"]], "quapy.method.meta module": [[6, "module-quapy.method.meta"]], "quapy.method.non_aggregative module": [[6, "module-quapy.method.non_aggregative"]], "quapy.model_selection module": [[3, "module-quapy.model_selection"]], "quapy.plot module": [[3, "module-quapy.plot"]], "quapy.protocol module": [[3, "module-quapy.protocol"]], "quapy.util module": [[3, "module-quapy.util"]]}, "docnames": ["generated/quapy", "index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1}, "filenames": ["generated/quapy.rst", "index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "indexentries": {"absolute_error() (in module quapy.error)": [[3, "quapy.error.absolute_error"]], "abstractprotocol (class in quapy.protocol)": [[3, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[3, "quapy.protocol.AbstractStochasticSeededProtocol"]], "acc_error() (in module quapy.error)": [[3, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[3, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[3, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[3, "quapy.error.ae"]], "app (class in quapy.protocol)": [[3, "quapy.protocol.APP"]], "argmin_prevalence() (in module quapy.functional)": [[3, "quapy.functional.argmin_prevalence"]], "artificialprevalenceprotocol (in module quapy.protocol)": [[3, "quapy.protocol.ArtificialPrevalenceProtocol"]], "as_binary_prevalence() (in module quapy.functional)": [[3, "quapy.functional.as_binary_prevalence"]], "best_model() (quapy.model_selection.gridsearchq method)": [[3, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[3, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[3, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[3, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[3, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[3, "quapy.functional.check_prevalence_vector"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[3, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "configstatus (class in quapy.model_selection)": [[3, "quapy.model_selection.ConfigStatus"]], "create_if_not_exist() (in module quapy.util)": [[3, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[3, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[3, "quapy.model_selection.cross_val_predict"]], "domainmixer (class in quapy.protocol)": [[3, "quapy.protocol.DomainMixer"]], "download_file() (in module quapy.util)": [[3, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[3, "quapy.util.download_file_if_not_exists"]], "earlystop (class in quapy.util)": [[3, "quapy.util.EarlyStop"]], "error (quapy.model_selection.status attribute)": [[3, "quapy.model_selection.Status.ERROR"]], "error_by_drift() (in module quapy.plot)": [[3, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[3, "quapy.evaluation.evaluate"]], "evaluate_on_samples() (in module quapy.evaluation)": [[3, "quapy.evaluation.evaluate_on_samples"]], "evaluation_report() (in module quapy.evaluation)": [[3, "quapy.evaluation.evaluation_report"]], "expand_grid() (in module quapy.model_selection)": [[3, "quapy.model_selection.expand_grid"]], "f1_error() (in module quapy.error)": [[3, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[3, "quapy.error.f1e"]], "failed() (quapy.model_selection.configstatus method)": [[3, "quapy.model_selection.ConfigStatus.failed"]], "fit() (quapy.model_selection.gridsearchq method)": [[3, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[3, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[3, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_divergence() (in module quapy.functional)": [[3, "quapy.functional.get_divergence"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[3, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[3, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[3, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[3, "quapy.util.get_quapy_home"]], "gridsearchq (class in quapy.model_selection)": [[3, "quapy.model_selection.GridSearchQ"]], "group_params() (in module quapy.model_selection)": [[3, "quapy.model_selection.group_params"]], "hellingerdistance() (in module quapy.functional)": [[3, "quapy.functional.HellingerDistance"]], "invalid (quapy.model_selection.status attribute)": [[3, "quapy.model_selection.Status.INVALID"]], "iterateprotocol (class in quapy.protocol)": [[3, "quapy.protocol.IterateProtocol"]], "kld() (in module quapy.error)": [[3, "quapy.error.kld"]], "linear_search() (in module quapy.functional)": [[3, "quapy.functional.linear_search"]], "mae() (in module quapy.error)": [[3, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[3, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[3, "quapy.error.mean_absolute_error"]], "mean_normalized_absolute_error() (in module quapy.error)": [[3, "quapy.error.mean_normalized_absolute_error"]], "mean_normalized_relative_absolute_error() (in module quapy.error)": [[3, "quapy.error.mean_normalized_relative_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[3, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[3, "quapy.error.mkld"]], "mnae() (in module quapy.error)": [[3, "quapy.error.mnae"]], "mnkld() (in module quapy.error)": [[3, "quapy.error.mnkld"]], "mnrae() (in module quapy.error)": [[3, "quapy.error.mnrae"]], "module": [[0, "module-quapy"], [3, "module-quapy"], [3, "module-quapy.error"], [3, "module-quapy.evaluation"], [3, "module-quapy.functional"], [3, "module-quapy.model_selection"], [3, "module-quapy.plot"], [3, "module-quapy.protocol"], [3, "module-quapy.util"]], "mrae() (in module quapy.error)": [[3, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[3, "quapy.error.mse"]], "nae() (in module quapy.error)": [[3, "quapy.error.nae"]], "naturalprevalenceprotocol (in module quapy.protocol)": [[3, "quapy.protocol.NaturalPrevalenceProtocol"]], "nkld() (in module quapy.error)": [[3, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[3, "quapy.functional.normalize_prevalence"]], "normalized_absolute_error() (in module quapy.error)": [[3, "quapy.error.normalized_absolute_error"]], "normalized_relative_absolute_error() (in module quapy.error)": [[3, "quapy.error.normalized_relative_absolute_error"]], "npp (class in quapy.protocol)": [[3, "quapy.protocol.NPP"]], "nrae() (in module quapy.error)": [[3, "quapy.error.nrae"]], "num_prevalence_combinations() (in module quapy.functional)": [[3, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[3, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[3, "quapy.protocol.OnLabelledCollectionProtocol"]], "optim_minimize() (in module quapy.functional)": [[3, "quapy.functional.optim_minimize"]], "parallel() (in module quapy.util)": [[3, "quapy.util.parallel"]], "pickled_resource() (in module quapy.util)": [[3, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[3, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[3, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[3, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[3, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[3, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[3, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[0, "module-quapy"], [3, "module-quapy"]], "quapy.error": [[3, "module-quapy.error"]], "quapy.evaluation": [[3, "module-quapy.evaluation"]], "quapy.functional": [[3, "module-quapy.functional"]], "quapy.model_selection": [[3, "module-quapy.model_selection"]], "quapy.plot": [[3, "module-quapy.plot"]], "quapy.protocol": [[3, "module-quapy.protocol"]], "quapy.util": [[3, "module-quapy.util"]], "rae() (in module quapy.error)": [[3, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[3, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[3, "quapy.error.relative_absolute_error"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[3, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[3, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.app method)": [[3, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.domainmixer method)": [[3, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[3, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.upp method)": [[3, "quapy.protocol.UPP.sample"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[3, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.app method)": [[3, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[3, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[3, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.upp method)": [[3, "quapy.protocol.UPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[3, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[3, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[3, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[3, "quapy.error.smooth"]], "status (class in quapy.model_selection)": [[3, "quapy.model_selection.Status"]], "strprev() (in module quapy.functional)": [[3, "quapy.functional.strprev"]], "success (quapy.model_selection.status attribute)": [[3, "quapy.model_selection.Status.SUCCESS"]], "success() (quapy.model_selection.configstatus method)": [[3, "quapy.model_selection.ConfigStatus.success"]], "temp_seed() (in module quapy.util)": [[3, "quapy.util.temp_seed"]], "timeout (quapy.model_selection.status attribute)": [[3, "quapy.model_selection.Status.TIMEOUT"]], "timeout() (in module quapy.util)": [[3, "quapy.util.timeout"]], "topsoedistance() (in module quapy.functional)": [[3, "quapy.functional.TopsoeDistance"]], "total() (quapy.protocol.abstractprotocol method)": [[3, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.app method)": [[3, "quapy.protocol.APP.total"]], "total() (quapy.protocol.domainmixer method)": [[3, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.iterateprotocol method)": [[3, "quapy.protocol.IterateProtocol.total"]], "total() (quapy.protocol.npp method)": [[3, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.upp method)": [[3, "quapy.protocol.UPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[3, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[3, "quapy.functional.uniform_simplex_sampling"]], "uniformprevalenceprotocol (in module quapy.protocol)": [[3, "quapy.protocol.UniformPrevalenceProtocol"]], "upp (class in quapy.protocol)": [[3, "quapy.protocol.UPP"]]}, "objects": {"": [[3, 0, 0, "-", "quapy"]], "quapy": [[4, 0, 0, "-", "classification"], [5, 0, 0, "-", "data"], [3, 0, 0, "-", "error"], [3, 0, 0, "-", "evaluation"], [3, 0, 0, "-", "functional"], [6, 0, 0, "-", "method"], [3, 0, 0, "-", "model_selection"], [3, 0, 0, "-", "plot"], [3, 0, 0, "-", "protocol"], [3, 0, 0, "-", "util"]], "quapy.classification": [[4, 0, 0, "-", "calibration"], [4, 0, 0, "-", "methods"], [4, 0, 0, "-", "neural"], [4, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[4, 1, 1, "", "BCTSCalibration"], [4, 1, 1, "", "NBVSCalibration"], [4, 1, 1, "", "RecalibratedProbabilisticClassifier"], [4, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [4, 1, 1, "", "TSCalibration"], [4, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[4, 2, 1, "", "classes_"], [4, 3, 1, "", "fit"], [4, 3, 1, "", "fit_cv"], [4, 3, 1, "", "fit_tr_val"], [4, 3, 1, "", "predict"], [4, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[4, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[4, 3, 1, "", "fit"], [4, 3, 1, "", "get_params"], [4, 3, 1, "", "predict"], [4, 3, 1, "", "predict_proba"], [4, 3, 1, "", "set_params"], [4, 3, 1, "", "transform"]], "quapy.classification.neural": [[4, 1, 1, "", "CNNnet"], [4, 1, 1, "", "LSTMnet"], [4, 1, 1, "", "NeuralClassifierTrainer"], [4, 1, 1, "", "TextClassifierNet"], [4, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[4, 3, 1, "", "document_embedding"], [4, 3, 1, "", "get_params"], [4, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[4, 3, 1, "", "document_embedding"], [4, 3, 1, "", "get_params"], [4, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[4, 2, 1, "", "device"], [4, 3, 1, "", "fit"], [4, 3, 1, "", "get_params"], [4, 3, 1, "", "predict"], [4, 3, 1, "", "predict_proba"], [4, 3, 1, "", "reset_net_params"], [4, 3, 1, "", "set_params"], [4, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[4, 3, 1, "", "dimensions"], [4, 3, 1, "", "document_embedding"], [4, 3, 1, "", "forward"], [4, 3, 1, "", "get_params"], [4, 3, 1, "", "predict_proba"], [4, 2, 1, "", "vocabulary_size"], [4, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[4, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[4, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[4, 3, 1, "", "decision_function"], [4, 3, 1, "", "fit"], [4, 3, 1, "", "predict"], [4, 4, 1, "", "valid_losses"]], "quapy.data": [[5, 0, 0, "-", "base"], [5, 0, 0, "-", "datasets"], [5, 0, 0, "-", "preprocessing"], [5, 0, 0, "-", "reader"]], "quapy.data.base": [[5, 1, 1, "", "Dataset"], [5, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[5, 3, 1, "", "SplitStratified"], [5, 2, 1, "", "binary"], [5, 2, 1, "", "classes_"], [5, 3, 1, "", "kFCV"], [5, 3, 1, "", "load"], [5, 2, 1, "", "n_classes"], [5, 3, 1, "", "reduce"], [5, 3, 1, "", "stats"], [5, 2, 1, "", "train_test"], [5, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[5, 2, 1, "", "X"], [5, 2, 1, "", "Xp"], [5, 2, 1, "", "Xy"], [5, 2, 1, "", "binary"], [5, 3, 1, "", "counts"], [5, 3, 1, "", "join"], [5, 3, 1, "", "kFCV"], [5, 3, 1, "", "load"], [5, 2, 1, "", "n_classes"], [5, 2, 1, "", "p"], [5, 3, 1, "", "prevalence"], [5, 3, 1, "", "sampling"], [5, 3, 1, "", "sampling_from_index"], [5, 3, 1, "", "sampling_index"], [5, 3, 1, "", "split_random"], [5, 3, 1, "", "split_stratified"], [5, 3, 1, "", "stats"], [5, 3, 1, "", "uniform_sampling"], [5, 3, 1, "", "uniform_sampling_index"], [5, 2, 1, "", "y"]], "quapy.data.datasets": [[5, 5, 1, "", "fetch_IFCB"], [5, 5, 1, "", "fetch_UCIBinaryDataset"], [5, 5, 1, "", "fetch_UCIBinaryLabelledCollection"], [5, 5, 1, "", "fetch_UCIMulticlassDataset"], [5, 5, 1, "", "fetch_UCIMulticlassLabelledCollection"], [5, 5, 1, "", "fetch_lequa2022"], [5, 5, 1, "", "fetch_reviews"], [5, 5, 1, "", "fetch_twitter"], [5, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[5, 1, 1, "", "IndexTransformer"], [5, 5, 1, "", "index"], [5, 5, 1, "", "reduce_columns"], [5, 5, 1, "", "standardize"], [5, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[5, 3, 1, "", "add_word"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "fit_transform"], [5, 3, 1, "", "transform"], [5, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[5, 5, 1, "", "binarize"], [5, 5, 1, "", "from_csv"], [5, 5, 1, "", "from_sparse"], [5, 5, 1, "", "from_text"], [5, 5, 1, "", "reindex_labels"]], "quapy.error": [[3, 5, 1, "", "absolute_error"], [3, 5, 1, "", "acc_error"], [3, 5, 1, "", "acce"], [3, 5, 1, "", "ae"], [3, 5, 1, "", "f1_error"], [3, 5, 1, "", "f1e"], [3, 5, 1, "", "from_name"], [3, 5, 1, "", "kld"], [3, 5, 1, "", "mae"], [3, 5, 1, "", "mean_absolute_error"], [3, 5, 1, "", "mean_normalized_absolute_error"], [3, 5, 1, "", "mean_normalized_relative_absolute_error"], [3, 5, 1, "", "mean_relative_absolute_error"], [3, 5, 1, "", "mkld"], [3, 5, 1, "", "mnae"], [3, 5, 1, "", "mnkld"], [3, 5, 1, "", "mnrae"], [3, 5, 1, "", "mrae"], [3, 5, 1, "", "mse"], [3, 5, 1, "", "nae"], [3, 5, 1, "", "nkld"], [3, 5, 1, "", "normalized_absolute_error"], [3, 5, 1, "", "normalized_relative_absolute_error"], [3, 5, 1, "", "nrae"], [3, 5, 1, "", "rae"], [3, 5, 1, "", "relative_absolute_error"], [3, 5, 1, "", "se"], [3, 5, 1, "", "smooth"]], "quapy.evaluation": [[3, 5, 1, "", "evaluate"], [3, 5, 1, "", "evaluate_on_samples"], [3, 5, 1, "", "evaluation_report"], [3, 5, 1, "", "prediction"]], "quapy.functional": [[3, 5, 1, "", "HellingerDistance"], [3, 5, 1, "", "TopsoeDistance"], [3, 5, 1, "", "adjusted_quantification"], [3, 5, 1, "", "argmin_prevalence"], [3, 5, 1, "", "as_binary_prevalence"], [3, 5, 1, "", "check_prevalence_vector"], [3, 5, 1, "", "get_divergence"], [3, 5, 1, "", "get_nprevpoints_approximation"], [3, 5, 1, "", "linear_search"], [3, 5, 1, "", "normalize_prevalence"], [3, 5, 1, "", "num_prevalence_combinations"], [3, 5, 1, "", "optim_minimize"], [3, 5, 1, "", "prevalence_from_labels"], [3, 5, 1, "", "prevalence_from_probabilities"], [3, 5, 1, "", "prevalence_linspace"], [3, 5, 1, "", "strprev"], [3, 5, 1, "", "uniform_prevalence_sampling"], [3, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[6, 0, 0, "-", "_kdey"], [6, 0, 0, "-", "_neural"], [6, 0, 0, "-", "_threshold_optim"], [6, 0, 0, "-", "aggregative"], [6, 0, 0, "-", "base"], [6, 0, 0, "-", "meta"], [6, 0, 0, "-", "non_aggregative"]], "quapy.method._kdey": [[6, 1, 1, "", "KDEBase"], [6, 1, 1, "", "KDEyCS"], [6, 1, 1, "", "KDEyHD"], [6, 1, 1, "", "KDEyML"]], "quapy.method._kdey.KDEBase": [[6, 4, 1, "", "BANDWIDTH_METHOD"], [6, 3, 1, "", "get_kde_function"], [6, 3, 1, "", "get_mixture_components"], [6, 3, 1, "", "pdf"]], "quapy.method._kdey.KDEyCS": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"], [6, 3, 1, "", "gram_matrix_mix_sum"]], "quapy.method._kdey.KDEyHD": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"]], "quapy.method._kdey.KDEyML": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"]], "quapy.method._neural": [[6, 1, 1, "", "QuaNetModule"], [6, 1, 1, "", "QuaNetTrainer"], [6, 5, 1, "", "mae_loss"]], "quapy.method._neural.QuaNetModule": [[6, 2, 1, "", "device"], [6, 3, 1, "", "forward"]], "quapy.method._neural.QuaNetTrainer": [[6, 2, 1, "", "classes_"], [6, 3, 1, "", "clean_checkpoint"], [6, 3, 1, "", "clean_checkpoint_dir"], [6, 3, 1, "", "fit"], [6, 3, 1, "", "get_params"], [6, 3, 1, "", "quantify"], [6, 3, 1, "", "set_params"]], "quapy.method._threshold_optim": [[6, 1, 1, "", "MAX"], [6, 1, 1, "", "MS"], [6, 1, 1, "", "MS2"], [6, 1, 1, "", "T50"], [6, 1, 1, "", "ThresholdOptimization"], [6, 1, 1, "", "X"]], "quapy.method._threshold_optim.MAX": [[6, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"], [6, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS2": [[6, 3, 1, "", "discard"]], "quapy.method._threshold_optim.T50": [[6, 3, 1, "", "condition"]], "quapy.method._threshold_optim.ThresholdOptimization": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregate_with_threshold"], [6, 3, 1, "", "aggregation_fit"], [6, 3, 1, "", "condition"], [6, 3, 1, "", "discard"]], "quapy.method._threshold_optim.X": [[6, 3, 1, "", "condition"]], "quapy.method.aggregative": [[6, 1, 1, "", "ACC"], [6, 4, 1, "", "AdjustedClassifyAndCount"], [6, 1, 1, "", "AggregativeCrispQuantifier"], [6, 1, 1, "", "AggregativeMedianEstimator"], [6, 1, 1, "", "AggregativeQuantifier"], [6, 1, 1, "", "AggregativeSoftQuantifier"], [6, 1, 1, "", "BinaryAggregativeQuantifier"], [6, 1, 1, "", "CC"], [6, 4, 1, "", "ClassifyAndCount"], [6, 1, 1, "", "DMy"], [6, 4, 1, "", "DistributionMatchingY"], [6, 1, 1, "", "DyS"], [6, 1, 1, "", "EMQ"], [6, 4, 1, "", "ExpectationMaximizationQuantifier"], [6, 1, 1, "", "HDy"], [6, 4, 1, "", "HellingerDistanceY"], [6, 1, 1, "", "OneVsAllAggregative"], [6, 1, 1, "", "PACC"], [6, 1, 1, "", "PCC"], [6, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [6, 4, 1, "", "ProbabilisticClassifyAndCount"], [6, 4, 1, "", "SLD"], [6, 1, 1, "", "SMM"], [6, 5, 1, "", "newELM"], [6, 5, 1, "", "newSVMAE"], [6, 5, 1, "", "newSVMKLD"], [6, 5, 1, "", "newSVMQ"], [6, 5, 1, "", "newSVMRAE"]], "quapy.method.aggregative.ACC": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"], [6, 3, 1, "", "getPteCondEstim"], [6, 3, 1, "", "solve_adjustment"]], "quapy.method.aggregative.AggregativeMedianEstimator": [[6, 3, 1, "", "fit"], [6, 3, 1, "", "get_params"], [6, 3, 1, "", "quantify"], [6, 3, 1, "", "set_params"]], "quapy.method.aggregative.AggregativeQuantifier": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"], [6, 2, 1, "", "classes_"], [6, 2, 1, "", "classifier"], [6, 3, 1, "", "classifier_fit_predict"], [6, 3, 1, "", "classify"], [6, 3, 1, "", "fit"], [6, 3, 1, "", "quantify"], [6, 2, 1, "", "val_split"], [6, 4, 1, "", "val_split_"]], "quapy.method.aggregative.BinaryAggregativeQuantifier": [[6, 3, 1, "", "fit"], [6, 2, 1, "", "neg_label"], [6, 2, 1, "", "pos_label"]], "quapy.method.aggregative.CC": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DMy": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DyS": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.EMQ": [[6, 3, 1, "", "EM"], [6, 3, 1, "", "EMQ_BCTS"], [6, 4, 1, "", "EPSILON"], [6, 4, 1, "", "MAX_ITER"], [6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"], [6, 3, 1, "", "classify"], [6, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.OneVsAllAggregative": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "classify"]], "quapy.method.aggregative.PACC": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"], [6, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.SMM": [[6, 3, 1, "", "aggregate"], [6, 3, 1, "", "aggregation_fit"]], "quapy.method.base": [[6, 1, 1, "", "BaseQuantifier"], [6, 1, 1, "", "BinaryQuantifier"], [6, 1, 1, "", "OneVsAll"], [6, 1, 1, "", "OneVsAllGeneric"], [6, 5, 1, "", "newOneVsAll"]], "quapy.method.base.BaseQuantifier": [[6, 3, 1, "", "fit"], [6, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[6, 2, 1, "", "classes_"], [6, 3, 1, "", "fit"], [6, 3, 1, "", "quantify"]], "quapy.method.meta": [[6, 5, 1, "", "EACC"], [6, 5, 1, "", "ECC"], [6, 5, 1, "", "EEMQ"], [6, 5, 1, "", "EHDy"], [6, 5, 1, "", "EPACC"], [6, 1, 1, "", "Ensemble"], [6, 1, 1, "", "MedianEstimator"], [6, 1, 1, "", "MedianEstimator2"], [6, 5, 1, "", "ensembleFactory"], [6, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[6, 4, 1, "", "VALID_POLICIES"], [6, 2, 1, "", "aggregative"], [6, 3, 1, "", "fit"], [6, 3, 1, "", "get_params"], [6, 2, 1, "", "probabilistic"], [6, 3, 1, "", "quantify"], [6, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator": [[6, 3, 1, "", "fit"], [6, 3, 1, "", "get_params"], [6, 3, 1, "", "quantify"], [6, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator2": [[6, 3, 1, "", "fit"], [6, 3, 1, "", "get_params"], [6, 3, 1, "", "quantify"], [6, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[6, 1, 1, "", "DMx"], [6, 4, 1, "", "DistributionMatchingX"], [6, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"]], "quapy.method.non_aggregative.DMx": [[6, 3, 1, "", "HDx"], [6, 3, 1, "", "fit"], [6, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[6, 3, 1, "", "fit"], [6, 3, 1, "", "quantify"]], "quapy.model_selection": [[3, 1, 1, "", "ConfigStatus"], [3, 1, 1, "", "GridSearchQ"], [3, 1, 1, "", "Status"], [3, 5, 1, "", "cross_val_predict"], [3, 5, 1, "", "expand_grid"], [3, 5, 1, "", "group_params"]], "quapy.model_selection.ConfigStatus": [[3, 3, 1, "", "failed"], [3, 3, 1, "", "success"]], "quapy.model_selection.GridSearchQ": [[3, 3, 1, "", "best_model"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "quantify"], [3, 3, 1, "", "set_params"]], "quapy.model_selection.Status": [[3, 4, 1, "", "ERROR"], [3, 4, 1, "", "INVALID"], [3, 4, 1, "", "SUCCESS"], [3, 4, 1, "", "TIMEOUT"]], "quapy.plot": [[3, 5, 1, "", "binary_bias_bins"], [3, 5, 1, "", "binary_bias_global"], [3, 5, 1, "", "binary_diagonal"], [3, 5, 1, "", "brokenbar_supremacy_by_drift"], [3, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[3, 1, 1, "", "APP"], [3, 1, 1, "", "AbstractProtocol"], [3, 1, 1, "", "AbstractStochasticSeededProtocol"], [3, 4, 1, "", "ArtificialPrevalenceProtocol"], [3, 1, 1, "", "DomainMixer"], [3, 1, 1, "", "IterateProtocol"], [3, 1, 1, "", "NPP"], [3, 4, 1, "", "NaturalPrevalenceProtocol"], [3, 1, 1, "", "OnLabelledCollectionProtocol"], [3, 1, 1, "", "UPP"], [3, 4, 1, "", "UniformPrevalenceProtocol"]], "quapy.protocol.APP": [[3, 3, 1, "", "prevalence_grid"], [3, 3, 1, "", "sample"], [3, 3, 1, "", "samples_parameters"], [3, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[3, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[3, 3, 1, "", "collator"], [3, 2, 1, "", "random_state"], [3, 3, 1, "", "sample"], [3, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[3, 3, 1, "", "sample"], [3, 3, 1, "", "samples_parameters"], [3, 3, 1, "", "total"]], "quapy.protocol.IterateProtocol": [[3, 3, 1, "", "total"]], "quapy.protocol.NPP": [[3, 3, 1, "", "sample"], [3, 3, 1, "", "samples_parameters"], [3, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[3, 4, 1, "", "RETURN_TYPES"], [3, 3, 1, "", "get_collator"], [3, 3, 1, "", "get_labelled_collection"], [3, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.UPP": [[3, 3, 1, "", "sample"], [3, 3, 1, "", "samples_parameters"], [3, 3, 1, "", "total"]], "quapy.util": [[3, 1, 1, "", "EarlyStop"], [3, 5, 1, "", "create_if_not_exist"], [3, 5, 1, "", "create_parent_dir"], [3, 5, 1, "", "download_file"], [3, 5, 1, "", "download_file_if_not_exists"], [3, 5, 1, "", "get_quapy_home"], [3, 5, 1, "", "map_parallel"], [3, 5, 1, "", "parallel"], [3, 5, 1, "", "pickled_resource"], [3, 5, 1, "", "save_text_file"], [3, 5, 1, "", "temp_seed"], [3, 5, 1, "", "timeout"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "terms": {"": [3, 4, 5, 6], "0": [3, 4, 5, 6], "0001": 6, "001": [4, 6], "005": 3, "01": [3, 4, 6], "05": [3, 6], "063": 5, "08": 3, "081": 5, "1": [3, 4, 5, 6], "10": [3, 4, 6], "100": [3, 4, 5, 6], "1000": [5, 6], "10000": [3, 6], "10036244": [], "1024": 6, "11": 3, "110": 6, "1145": 6, "12": 4, "13": 4, "15": [3, 5], "19": 5, "1e": [3, 4, 6], "2": [3, 5, 6], "20": [3, 6], "200": 4, "2006": 6, "2008": 6, "2013": 6, "2015": [4, 6], "2016": [5, 6], "2017": [5, 6], "2018": 5, "2019": [5, 6], "2020": 4, "2021": 6, "2022": [5, 6], "21": 3, "21591": 5, "22": [4, 5], "222": 4, "23": 4, "232": 4, "24": 4, "25": [3, 4, 6], "256": 4, "26": 4, "27": 4, "27th": 5, "28": 5, "286": 5, "2dx": 6, "2frac": 3, "2nd": 6, "2t": 3, "2tp": 3, "3": [3, 4, 5, 6], "30": [5, 6], "300": 4, "3219819": 6, "3220059": 6, "3227": 3, "33": 3, "34": [5, 6], "3821": 5, "4": [3, 5], "40": 6, "4403": 5, "45": 5, "5": [3, 4, 5, 6], "50": [3, 6], "500": 6, "512": [4, 6], "5fcvx2": 5, "6": 5, "64": [4, 6], "66": 6, "67": 3, "7": [3, 4, 6], "75": 3, "8": [5, 6], "86": 5, "87": 5, "9": 3, "90": 3, "919": 5, "937": 5, "95": 3, "99": 3, "A": [3, 4, 5, 6], "By": 3, "For": [3, 5, 6], "If": [3, 5, 6], "In": [3, 4, 5, 6], "It": [3, 5], "No": [4, 6], "On": 6, "One": 6, "The": [3, 4, 5, 6], "These": [4, 6], "To": 5, "_": [3, 5, 6], "__": 6, "_f": 6, "_i": 6, "_ifcb": 5, "_kdei": 6, "_lequa2022": 5, "_neural": 6, "_threshold_optim": 6, "abc": 6, "about": [3, 5, 6], "abov": 3, "absolut": [3, 6], "absolute_error": [1, 2, 3], "abstent": 4, "abstract": [3, 4, 5, 6], "abstractprotocol": [1, 2, 3, 5], "abstractstochasticseededprotocol": [1, 2, 3], "acc": [1, 2, 3, 6], "acc_error": [1, 2, 3], "access": [5, 6], "accommod": 3, "accord": [3, 4, 5, 6], "accordingli": 6, "accuraci": [3, 6], "achiev": 6, "acm": [5, 6], "across": [3, 6], "act": 3, "actual": [5, 6], "ad": 3, "adapt": [3, 4], "add": [3, 5], "add_word": [1, 3, 5], "addit": 3, "adher": 3, "adjust": [3, 6], "adjusted_quantif": [1, 2, 3], "adjustedclassifyandcount": [1, 3, 6], "admit": 6, "adopt": 5, "advanc": 3, "advantag": 6, "ae": [1, 2, 3], "after": [3, 6], "afterward": [4, 6], "again": 3, "aggr_speedup": 3, "aggreg": [1, 2, 3], "aggregate_with_threshold": [1, 3, 6], "aggregation_fit": [1, 3, 6], "aggregative_method": [], "aggregativecrispquantifi": [1, 3, 6], "aggregativemedianestim": [1, 3, 6], "aggregativequantifi": [1, 3, 6], "aggregativesoftquantifi": [1, 3, 6], "aka": [5, 6], "akin": [3, 6], "al": [4, 5, 6], "alaiz": 6, "alegr": 6, "alexandari": [4, 6], "algorithm": [3, 5, 6], "alia": [3, 5, 6], "all": [3, 4, 5, 6], "alloc": [3, 4], "allow": [3, 4, 5, 6], "along": [3, 6], "alpha": 6, "alpha_i": 6, "alreadi": [3, 6], "also": [3, 4, 6], "altern": [3, 6], "although": 6, "alwai": 6, "amount": 3, "an": [3, 4, 5, 6], "analysi": 5, "ancestor": 6, "ani": [3, 4, 5, 6], "anoth": [3, 6], "anotherdir": 3, "anyth": 6, "anywher": 3, "api": 1, "app": [1, 2, 3, 6], "appli": [3, 4, 5, 6], "applic": 6, "approach": 6, "approxim": [3, 4], "ar": [3, 4, 5, 6], "archiv": 5, "archive_filenam": 3, "arg": [3, 4, 5, 6], "argmax": 3, "argmin_preval": [1, 2, 3], "args_i": 3, "argument": [3, 5, 6], "around": [5, 6], "arrai": [3, 4, 5, 6], "artifici": 3, "artificialprevalenceprotocol": [1, 2, 3], "as_binary_preval": [1, 2, 3], "asarrai": 3, "asdataload": [1, 3, 4], "assert": 5, "assign": [3, 5], "associ": 3, "assum": 6, "assumpion": 6, "attach": 5, "attempt": 6, "attribut": 6, "author": 6, "auto": 3, "automat": 4, "av": 6, "avail": [4, 5, 6], "averag": [3, 5, 6], "avoid": 3, "ax": 6, "axi": 3, "b": [3, 5, 6], "backend": [3, 6], "balanc": 6, "band": 3, "bandwidth": 6, "bandwidth_method": [1, 3, 6], "bar": 3, "barranquero": [4, 6], "base": [1, 2, 3, 4], "base_method": [], "base_quantifi": 6, "base_quantifier_class": 6, "baseestim": [4, 6], "basequantifi": [1, 3, 6], "basic": [5, 6], "batch": 4, "batch_siz": 4, "batch_size_test": 4, "bct": [4, 6], "bctscalibr": [1, 3, 4], "bean": 5, "beat": [4, 6], "been": [3, 4, 5, 6], "befor": [3, 4, 5, 6], "beforehand": 3, "begin": 3, "behaviour": 3, "being": [3, 6], "belong": 6, "below": [3, 5], "benchmark": [], "best": [3, 4, 6], "best_epoch": 3, "best_model": [1, 2, 3], "best_scor": 3, "better": 6, "between": [3, 4, 6], "bia": [3, 4, 6], "bidirect": 6, "bin": [3, 6], "binar": [1, 3, 5], "binari": [1, 3, 4, 5, 6], "binary_bias_bin": [1, 2, 3], "binary_bias_glob": [1, 2, 3], "binary_diagon": [1, 2, 3], "binary_quantifi": 6, "binaryaggregativequantifi": [1, 3, 6], "binaryquantifi": [1, 3, 6], "binom": 3, "block": 3, "boldsymbol": 6, "bool": [3, 6], "boolean": [3, 5, 6], "both": 5, "bound": [3, 6], "box": 3, "brief": 5, "bring": 6, "broken": 3, "brokenbar_supremacy_by_drift": [1, 2, 3], "buns": 6, "bypass": 6, "c": [3, 4, 5, 6], "calcul": 3, "calibr": [1, 2, 3], "calibratorfactori": 4, "call": [3, 5, 6], "callabl": [3, 5, 6], "can": [3, 5, 6], "cannot": 6, "care": 6, "carlo": 6, "carri": [3, 5, 6], "case": [3, 4, 5, 6], "castano": 5, "castro": 6, "categor": 5, "categori": 3, "cauchi": 6, "cc": [1, 3, 6], "cdf": 6, "cell": 6, "center": 6, "central": 3, "ch": 6, "chang": 5, "channel": 6, "characteriz": 5, "charg": [3, 5], "chart": 3, "check": 3, "check_dataframe_format": [], "check_file_format": [], "check_prevalence_vector": [1, 2, 3], "checkpoint": [4, 6], "checkpointdir": 6, "checkpointnam": 6, "checkpointpath": 4, "child": 3, "choic": 6, "choos": 6, "chosen": 3, "class": [3, 4, 5, 6], "class2int": 5, "class_weight": 6, "classes_": [1, 3, 4, 5, 6], "classif": [1, 2, 3, 5, 6], "classif_posterior": 6, "classif_predict": 6, "classifi": [1, 3, 4, 6], "classification_fit": 6, "classifier_fit_predict": [1, 3, 6], "classifier_net": 4, "classifiermixin": 4, "classifyandcount": [1, 3, 6], "classmethod": [3, 5, 6], "classnam": 5, "classs": 3, "clean_checkpoint": [1, 3, 6], "clean_checkpoint_dir": [1, 3, 6], "clef": 5, "clip": 3, "clip_if_necessari": 3, "close": [3, 5, 6], "closest": 6, "cm": 3, "cnn": 6, "cnnnet": [1, 3, 4, 6], "code": 4, "codefram": 5, "codifi": 5, "col": 5, "collat": [1, 2, 3], "collect": [3, 4, 5, 6], "collet": 5, "color": 3, "colormap": 3, "column": [3, 5], "com": [1, 3], "combin": [3, 6], "combinations_budget": 3, "come": [3, 5, 6], "common": 6, "compar": 3, "compat": 6, "competit": 5, "complement": 6, "complet": [3, 6], "compon": [3, 4, 6], "comput": [3, 6], "conceptu": 6, "condit": [1, 3, 6], "conduct": 3, "confer": [4, 5], "confid": 3, "configstatu": [1, 2, 3], "configur": [3, 6], "conform": 5, "connect": 6, "consecut": [3, 4, 5, 6], "consid": [3, 4, 5, 6], "consist": [3, 4, 5, 6], "constrain": [3, 5], "construct": 6, "consult": 6, "contain": [1, 3, 4, 5, 6], "contanin": 3, "content": 2, "context": 3, "control": 3, "conveni": 3, "converg": 6, "convert": [3, 4, 5, 6], "convolut": 4, "copi": [3, 5], "correct": [4, 6], "correctli": 3, "correspond": [5, 6], "count": [1, 3, 5, 6], "counter": 5, "countvector": 5, "covari": [5, 6], "cover": [3, 4], "coz": 5, "cpu": [4, 6], "creat": [3, 6], "create_if_not_exist": [1, 2, 3], "create_parent_dir": [1, 2, 3], "crip": 6, "crisp": [3, 6], "criteria": 5, "criterion": 6, "cross": [3, 4, 5, 6], "cross_val_predict": [1, 2, 3], "csr": 5, "csr_matrix": 5, "csv": 5, "cuda": [4, 6], "cumberson": 3, "current": [3, 4, 5, 6], "custom": [3, 5], "d": 6, "d_": 3, "dat": 4, "data": [1, 2, 3, 4, 6], "data_hom": 5, "datafram": 3, "dataload": 4, "datapoint": 6, "dataset": [1, 2, 3, 4, 6], "dataset_nam": 5, "deactiv": 3, "decaesteck": 6, "decai": 4, "decid": [3, 5], "decis": [3, 4, 6], "decision_funct": [1, 3, 4, 6], "decomposit": 4, "dedic": 5, "deep": [3, 6], "def": 3, "default": [3, 4, 5, 6], "defin": [3, 4, 5, 6], "degener": 6, "del": 5, "delai": 3, "delet": 4, "deliv": 6, "delta": 6, "denomin": 6, "dens": 6, "densiti": [3, 6], "depend": [3, 6], "describ": [3, 6], "descript": 5, "desir": [3, 5], "destin": 3, "detail": [4, 5, 6], "determin": 3, "determinist": 3, "devel": 5, "deviat": [3, 5], "devic": [1, 3, 4, 6], "df": [], "di": 6, "diagon": 3, "dict": [3, 5, 6], "dictionari": [3, 4, 5, 6], "differ": [3, 5, 6], "dij": 6, "dimens": [1, 3, 4, 5, 6], "dimension": [3, 4, 5, 6], "dir": [3, 6], "directori": [3, 4, 5, 6], "discard": [1, 3, 6], "discret": [3, 6], "disjoint": 4, "disk": [3, 5], "disntac": 6, "displai": [3, 4], "displaystyl": 3, "dissimilar": 6, "distanc": [3, 6], "distant": 3, "distribut": [3, 5, 6], "distributionmatch": [], "distributionmatchingi": [1, 3, 6], "distributionmatchingx": [1, 3, 6], "diverg": [3, 6], "divid": 3, "dl": 6, "dmx": [1, 3, 6], "dmy": [1, 3, 6], "do": [3, 4, 5, 6], "doc_embed": 6, "doc_embedding_s": 6, "doc_posterior": 6, "document": [4, 5, 6], "document_embed": [1, 3, 4], "doe": [3, 6], "doi": 6, "domain": 3, "domaina": 3, "domainb": 3, "domainmix": [1, 2, 3], "dot": 3, "dowload": 3, "down": [3, 5, 6], "download": [3, 5], "download_fil": [1, 2, 3], "download_file_if_not_exist": [1, 2, 3], "draw": 3, "drawn": [3, 5], "drop": 4, "drop_p": 4, "dropout": [4, 6], "dry": 5, "dtype": 5, "dump": 5, "dure": 6, "dx": 6, "dy": [1, 3, 6], "dynam": [4, 5, 6], "e": [3, 4, 5, 6], "eacc": [1, 3, 6], "each": [3, 4, 5, 6], "earli": [3, 4, 6], "earlystop": [1, 2, 3], "easili": 4, "ecc": [1, 3, 6], "ecml": 6, "edu": 5, "eemq": [1, 3, 6], "ehdi": [1, 3, 6], "either": [3, 6], "element": [3, 5, 6], "elm": 6, "em": [1, 3, 6], "emb": 4, "embed": [4, 6], "embed_s": 4, "embedding_s": 4, "emploi": 6, "empti": 5, "emq": [1, 3, 6], "emq_bct": [1, 3, 6], "enabl": 4, "encod": 5, "end": [3, 6], "end_msg": 3, "endeavour": 3, "endow": 3, "enforc": 3, "ensembl": [1, 3, 5, 6], "ensemblefactori": [1, 3, 6], "ensur": 3, "entir": 3, "entri": 6, "enum": 3, "enumer": 3, "environ": [3, 6], "ep": 3, "epacc": [1, 3, 6], "epoch": [3, 4, 6], "epsilon": [1, 3, 6], "equal": [3, 6], "equat": 6, "equidist": 3, "equival": 6, "err_nam": 3, "error": [1, 2, 4, 6], "error_by_drift": [1, 2, 3], "error_metr": 3, "error_nam": 3, "especi": 3, "establish": 3, "estim": [3, 4, 5, 6], "estim_prev": 3, "estim_preval": 6, "estimant": 6, "esuli": [4, 5, 6], "et": [4, 5, 6], "evaltestcas": [], "evalu": [1, 2, 4, 5, 6], "evaluate_on_sampl": [1, 2, 3], "evaluation_report": [1, 2, 3], "evalut": 6, "even": 3, "eventu": [4, 5], "everi": [3, 6], "exact": [5, 6], "exact_train_prev": 6, "exampl": [3, 4, 5, 6], "exce": 3, "except": [3, 6], "exhaust": 3, "exist": [3, 6], "exist_ok": 3, "expand": 3, "expand_grid": [1, 2, 3], "expect": 6, "expectationmaximizationquantifi": [1, 3, 6], "experi": [3, 5], "explicit": 6, "explor": 3, "express": 5, "extend": [3, 6], "extens": 6, "extract": [3, 5, 6], "f": [5, 6], "f1": [3, 4], "f1_error": [1, 2, 3], "f1e": [1, 2, 3], "f_1": 3, "factor": 3, "factori": 6, "fail": [1, 2, 3, 6], "fals": [3, 4, 5, 6], "famili": 6, "far": [3, 4, 5], "fare": 3, "fast": [3, 5], "faster": 5, "feat1": 5, "feat2": 5, "featn": 5, "featur": [5, 6], "feature_extract": 5, "fetch_ifcb": [1, 3, 5], "fetch_lequa2022": [1, 3, 5], "fetch_review": [1, 3, 5, 6], "fetch_twitt": [1, 3, 5], "fetch_ucibinarydataset": [1, 3, 5], "fetch_ucibinarylabelledcollect": [1, 3, 5], "fetch_ucilabelledcollect": 5, "fetch_ucimulticlassdataset": [1, 3, 5], "fetch_ucimulticlasslabelledcollect": [1, 3, 5], "ff": 6, "ff_layer": 6, "file": [3, 4, 5, 6], "filenam": 3, "final": 6, "find": 6, "first": [3, 5, 6], "fit": [1, 2, 3, 4, 5, 6], "fit_classifi": 6, "fit_cv": [1, 3, 4], "fit_tr_val": [1, 3, 4], "fit_transform": [1, 3, 5], "flag": 3, "float": [3, 4, 5, 6], "fn": 3, "fold": [3, 5, 6], "folder": [5, 6], "follow": [3, 5, 6], "fomart": 5, "for_model_select": 5, "forc": 3, "form": [3, 5, 6], "forman": 6, "format": [3, 5, 6], "former": 6, "forward": [1, 3, 4, 6], "found": [3, 4, 5, 6], "fp": 3, "fpr": [3, 6], "frac": [3, 6], "fraction": 6, "framework": [1, 6], "franc": 6, "frequenc": [5, 6], "from": [3, 4, 5, 6], "from_csv": [1, 3, 5], "from_nam": [1, 2, 3], "from_spars": [1, 3, 5], "from_text": [1, 3, 5], "full": [3, 5], "fulli": 3, "func": 3, "function": [1, 2, 4, 5, 6], "further": [4, 5, 6], "fusion": 5, "g": [3, 5, 6], "gain": 3, "gao": [5, 6], "gap": 5, "gasp": 5, "gen": 3, "gener": [3, 4, 5, 6], "generation_func": 3, "get": [3, 4, 5, 6], "get_col": [1, 2, 3], "get_diverg": [1, 2, 3], "get_kde_funct": [1, 3, 6], "get_labelled_collect": [1, 2, 3], "get_mixture_compon": [1, 3, 6], "get_nprevpoints_approxim": [1, 2, 3], "get_param": [1, 2, 3, 4, 6], "get_probability_distribut": [1, 3, 6], "get_quapy_hom": [1, 2, 3], "getptecondestim": [1, 3, 6], "github": [], "give": 6, "given": [3, 4, 5, 6], "global": 3, "goal": 6, "goe": 3, "gonz\u00e1lez": [5, 6], "got": 6, "gpu": [4, 6], "gram_matrix_mix_sum": [1, 3, 6], "greater": 5, "grenobl": 6, "grid": [3, 6], "gridsearchcv": 6, "gridsearchq": [1, 2, 3, 6], "ground": 6, "ground_truth_path": [], "group_param": [1, 2, 3], "guarante": [3, 5, 6], "guid": 5, "g\u00e1llego": [5, 6], "h": 6, "ha": [3, 4, 5, 6], "had": 5, "handl": 3, "hard": [3, 4, 6], "harmon": 3, "hat": [3, 6], "have": [3, 5, 6], "hcr": 5, "hd": [3, 6], "hdx": [1, 3, 6], "hdy": [1, 3, 6], "held": [3, 4, 6], "helling": 6, "hellingerdist": [1, 2, 3], "hellingerdistancei": [1, 3, 6], "hellingh": 3, "help": 6, "helper": 3, "henc": [3, 5, 6], "here": 6, "heurist": 6, "hidden": [4, 6], "hidden_s": 4, "hierarchytestcas": [], "high": 3, "highlight": 3, "hightlight": 3, "histogram": 6, "hlt": 1, "hold": [3, 6], "home": [3, 5], "hook": 6, "host": 1, "host_fold": 4, "how": [3, 5, 6], "howev": 3, "hp": 5, "hsosik": [], "html": 5, "http": [1, 3, 5, 6], "hyper": [3, 4, 6], "hyperparamet": [3, 6], "i": [1, 3, 4, 5, 6], "ic": 5, "id": 5, "ifcb": 5, "ifcb_zenodo": [], "ifcbtestsampl": 5, "ifcbtrainsamplesfromdir": 5, "ignor": [3, 5, 6], "ii": 3, "iid": 6, "imdb": 5, "implement": [3, 4, 5, 6], "implicit": 3, "import": [5, 6], "impos": 3, "improv": [3, 4, 6], "in_plac": 3, "includ": [1, 5, 6], "inconveni": 3, "independ": [3, 6], "index": [1, 3, 4, 5, 6], "indextransform": [1, 3, 5], "indic": [3, 4, 5, 6], "individu": 5, "ineffici": 3, "infer": 5, "inform": [3, 4, 5, 6], "infrequ": 5, "inherit": 3, "initi": [4, 6], "inplac": [5, 6], "input": [3, 4, 5, 6], "instal": [4, 6], "instanc": [3, 4, 5, 6], "instanti": [3, 4, 6], "instead": [3, 5, 6], "int": [3, 5, 6], "integ": [3, 4, 5, 6], "interest": 3, "interfac": 6, "intern": [4, 5, 6], "interpret": 6, "interv": 3, "intract": 3, "invalid": [1, 2, 3], "invok": [3, 5], "involv": 3, "irrespect": 6, "isomer": 3, "isometr": 3, "issu": [3, 6], "isti": 1, "item": 3, "iter": [3, 5, 6], "iterateprotocol": [1, 2, 3], "iterrow": [], "its": [3, 4, 6], "itself": [3, 6], "j": [5, 6], "joachim": [4, 6], "joblib": 6, "join": [1, 3, 5], "k": [3, 4, 5, 6], "kde": 6, "kdebas": [1, 3, 6], "kdei": 6, "kdeyc": [1, 3, 6], "kdeyhd": [1, 3, 6], "kdeyml": [1, 3, 6], "keep": [3, 5], "kei": [3, 5], "kept": 5, "kernel": [4, 6], "kernel_height": 4, "kerneldens": 6, "keyword": [5, 6], "kfcv": [1, 3, 4, 5, 6], "kindl": [5, 6], "kl": 3, "kld": [1, 2, 3, 4, 6], "knowledg": 5, "known": [3, 6], "kraemer": 3, "kullback": [3, 6], "kundaj": 4, "kwarg": [4, 5, 6], "l": 6, "l1": [3, 6], "l2": 6, "label": [3, 4, 5, 6], "labelcollectiontestcas": [], "labelled_collect": 3, "labelledcollect": [1, 3, 5, 6], "larg": 3, "larger": [3, 5, 6], "largest": 3, "last": [3, 4, 5], "latinn": 6, "latter": 6, "launch": 3, "layer": [4, 6], "lazi": 6, "ldot": 6, "lead": 5, "learn": [3, 4, 5, 6], "learner": [4, 6], "least": 5, "leav": 5, "left": [3, 5, 6], "legend": 3, "leibler": [3, 6], "len": 3, "length": [4, 5], "lequa": 5, "lequa2022_experi": 5, "less": [3, 5], "let": [3, 6], "leyend": 3, "like": [3, 4, 5, 6], "likelihood": [4, 6], "limit": [3, 6], "line": 3, "linear": [3, 6], "linear_model": 4, "linear_search": [1, 2, 3], "linearsvc": 5, "link": 5, "list": [3, 4, 5, 6], "listedcolormap": 3, "literatur": 3, "load": [1, 3, 5, 6], "load_category_map": [], "load_fn": [], "load_raw_docu": [], "load_vector_docu": [], "loader": 5, "loader_func": 5, "loader_kwarg": 5, "local": 3, "log": [3, 5, 6], "logist": [4, 6], "logisticregress": [4, 6], "logscal": 3, "logspac": 6, "loki": [3, 6], "long": 4, "longer": [3, 6], "longest": 4, "look": 6, "loop": 6, "loss": [3, 4, 6], "lost": 3, "low": [3, 4], "lower": 6, "lower_is_bett": 3, "lowranklogisticregress": [1, 3, 4], "lpha": [], "lq": 6, "lr": [4, 6], "lstm": [4, 6], "lstm_class_nlay": 4, "lstm_hidden_s": 6, "lstm_nlayer": 6, "lstmnet": [1, 3, 4], "m": [1, 3, 6], "machin": 4, "macro": 3, "made": [3, 5, 6], "mae": [1, 2, 3, 4, 6], "mae_loss": [1, 3, 6], "mai": [3, 6], "maintain": 6, "make": [3, 6], "makedir": 3, "manag": 5, "mani": [3, 6], "manner": [4, 6], "map": [4, 6], "map_parallel": [1, 2, 3], "margin": [4, 6], "mark": 3, "mass": 3, "match": 6, "math": [3, 6], "mathbb": 6, "mathcal": [3, 6], "mathrm": 6, "matplotlib": 3, "matric": 5, "matrix": [3, 6], "max": [1, 3, 6], "max_it": [1, 3, 6], "max_sample_s": 6, "maxim": 6, "maximum": [3, 4, 6], "maximumlikelihoodprevalenceestim": [1, 3, 6], "mean": [3, 4, 5, 6], "mean_absolute_error": [1, 2, 3], "mean_normalized_absolute_error": [1, 2, 3], "mean_normalized_relative_absolute_error": [1, 2, 3], "mean_relative_absolute_error": [1, 2, 3], "meant": 6, "measur": [3, 6], "median": 6, "medianestim": [1, 3, 6], "medianestimator2": [1, 3, 6], "medianquantifi": [], "member": 6, "memori": 4, "merchandis": 5, "messag": 6, "met": 5, "meta": [1, 2, 3], "method": [1, 2, 3], "method_nam": 3, "method_ord": 3, "methodnam": [], "metric": [3, 6], "might": [3, 5, 6], "mimick": 6, "min_": [3, 6], "min_df": [5, 6], "min_po": 6, "minim": [3, 6], "minimum": [5, 6], "minimun": 5, "mining6": 5, "minu": 3, "misclassif": 6, "misclassificatin": 6, "miss": 3, "mixtur": [3, 6], "mixture_point": 3, "mkld": [1, 2, 3, 6], "ml": [5, 6], "mlpe": 6, "mnae": [1, 2, 3, 6], "mnkld": [1, 2, 3, 6], "mnrae": [1, 2, 3, 6], "mock": [3, 4], "mock_labelled_collect": [], "model": [3, 4, 5, 6], "model_select": [1, 2, 6], "modif": 3, "modifi": 3, "modseltestcas": [], "modul": [0, 1, 2], "monitor": 3, "mont": 6, "montecarlo_tri": 6, "more": [3, 5, 6], "moreo": [5, 6], "most": [3, 5, 6], "mrae": [1, 2, 3, 4, 6], "ms2": [1, 3, 6], "mse": [1, 2, 3, 6], "msg": 3, "multi": 6, "multiclass": [3, 5, 6], "multipli": 3, "multiprocess": [3, 6], "multivari": 4, "must": [5, 6], "mutual": 6, "my_arrai": 3, "my_collect": 5, "mytestcas": [], "n": [3, 4, 6], "n_bin": [3, 6], "n_categori": [], "n_class": [1, 3, 4, 5, 6], "n_compon": 4, "n_dimens": [4, 6], "n_epoch": 6, "n_featur": [4, 6], "n_instanc": [3, 4, 6], "n_job": [3, 4, 5, 6], "n_preval": 3, "n_prevpoint": 3, "n_repeat": 3, "n_sampl": [3, 4], "n_test": 5, "n_train": 5, "nae": [1, 2, 3], "name": [3, 4, 5, 6], "natur": 3, "naturalprevalenceprotocol": [1, 2, 3], "nbin": [3, 6], "nbv": [4, 6], "nbvscalibr": [1, 3, 4], "ndarrai": [3, 5, 6], "necessari": [3, 6], "need": [3, 5, 6], "neg": [3, 6], "neg_label": [1, 3, 6], "nest": 6, "net": 4, "network": [3, 4, 5, 6], "neural": [1, 2, 3, 5, 6], "neuralclassifiertrain": [1, 3, 4, 6], "never": 3, "new": [3, 5], "newelm": [1, 3, 6], "newonevsal": [1, 3, 6], "newsvma": [1, 3, 6], "newsvmkld": [1, 3, 6], "newsvmq": [1, 3, 6], "newsvmra": [1, 3, 6], "next": [3, 4, 5], "nfeat": 6, "nfold": [3, 5], "nkld": [1, 2, 3, 4, 6], "nn": 4, "nogap": 5, "non": 6, "non_aggreg": [1, 2, 3], "non_aggregative_method": [], "none": [3, 4, 5, 6], "norm": 6, "normal": [3, 5, 6], "normalize_preval": [1, 2, 3], "normalized_absolute_error": [1, 2, 3], "normalized_relative_absolute_error": [1, 2, 3], "note": [3, 5], "noth": 6, "novemb": 4, "np": [3, 5, 6], "npp": [1, 2, 3], "nrae": [1, 2, 3], "nrepeat": 5, "num_prevalence_combin": [1, 2, 3], "number": [3, 4, 5, 6], "numer": [3, 5, 6], "numpi": [3, 4], "o": 3, "object": [3, 4, 5, 6], "observ": 6, "obtain": [3, 4, 6], "obtaind": 3, "obvious": 3, "occur": 5, "occurr": 5, "off": [4, 6], "offici": 5, "oldsymbol": [], "omd": 5, "on_preclassified_inst": [1, 2, 3], "onc": [3, 5], "one": [3, 5, 6], "ones": [3, 5, 6], "onevsal": [1, 3, 6], "onevsallaggreg": [1, 3, 6], "onevsallgener": [1, 3, 6], "onlabelledcollectionprotocol": [1, 2, 3], "onli": [3, 4, 5, 6], "open": [1, 3, 5], "optim": [3, 4, 6], "optim_minim": [1, 2, 3, 6], "option": [5, 6], "order": [3, 5, 6], "order_bi": 6, "org": [5, 6], "orient": [3, 6], "origin": [3, 5, 6], "other": [3, 5, 6], "otherwis": [3, 5, 6], "out": [3, 4, 5, 6], "outer": 3, "outlier": 3, "output": [3, 4, 5, 6], "outsid": 6, "over": 3, "overal": 3, "overridden": 6, "overview": 5, "p": [1, 3, 4, 5, 6], "p_": 6, "p_i": 3, "p_x": 6, "pacc": [1, 3, 6], "packag": [1, 2], "pad": [4, 5], "pad_length": 4, "padding_length": 4, "page": 1, "pair": 3, "panda": 3, "paper": [4, 6], "parallel": [1, 2, 3, 4, 5, 6], "parallel_backend": 6, "param": [3, 4, 6], "param_grid": [3, 6], "param_mod_sel": 6, "param_model_sel": 6, "paramet": [3, 4, 5, 6], "parameter": 6, "parent": 3, "parllel": 6, "part": 5, "particular": 6, "partit": [3, 4], "pass": [3, 4, 6], "patch": [4, 6], "path": [3, 4, 5, 6], "path_dir": [], "patienc": [3, 4, 6], "pcc": [1, 3, 6], "pdf": [1, 3, 6], "percentil": 3, "perf": [4, 6], "perform": [3, 4, 6], "perman": 3, "pglez82": [], "phase": 6, "pickl": [3, 5, 6], "pickle_path": 3, "pickled_resourc": [1, 2, 3], "pip": 1, "pipelin": 6, "pkdd": 6, "pkl": 3, "place": [3, 5], "plankton": 5, "pleas": 5, "plot": [1, 2], "pmlr": 4, "point": [3, 5], "polici": 6, "pos_class": [3, 5], "pos_label": [1, 3, 6], "posit": [3, 5, 6], "positive_preval": 3, "possibl": [3, 6], "post": 3, "posterior": [3, 4, 6], "posterior_prob": 6, "power": 3, "pp": 4, "pre": 3, "pre_classif": 3, "prec": 3, "preced": 5, "precis": 3, "precomput": 5, "predefin": 5, "predict": [1, 2, 3, 4, 6], "predict_on": 6, "predict_proba": [1, 3, 4, 6], "prefer": 3, "prefix": [], "preliminari": 6, "prepar": 3, "preprocess": [1, 2, 3, 6], "present": 5, "preserv": [3, 5], "prev": [3, 5], "preval": [1, 3, 4, 5, 6], "prevalence_estim": 3, "prevalence_from_label": [1, 2, 3], "prevalence_from_prob": [1, 2, 3], "prevalence_grid": [1, 2, 3], "prevalence_linspac": [1, 2, 3], "prevalence_valu": [], "prevel": 6, "previous": [3, 6], "prevs_estim": 6, "prevs_hat": 3, "princip": 4, "print": [3, 4, 5], "prior": [3, 6], "probabilist": [1, 3, 4, 6], "probabilisticadjustedclassifyandcount": [1, 3, 6], "probabilisticclassifyandcount": [1, 3, 6], "probabl": [3, 4, 6], "problem": [3, 5, 6], "procedur": 3, "proceed": [5, 6], "process": [3, 5], "produc": 3, "product": 5, "progress": 5, "proper": 6, "properli": 6, "properti": [3, 4, 5, 6], "proport": [4, 5, 6], "propos": 6, "protocol": [1, 2, 5, 6], "provid": [5, 6], "ptecondestim": 6, "ptr": 6, "publicli": 5, "purpos": 6, "put": 6, "py": 5, "python": [1, 5], "pytorch": 6, "p\u00e9rez": [5, 6], "q": [3, 4, 6], "q_": 6, "q_i": 3, "qacc": 4, "qdrop_p": 6, "qf1": 4, "qgm": 4, "qp": [3, 5, 6], "quanet": [4, 6], "quanetmodul": [1, 3, 6], "quanettrain": [1, 3, 6], "quantif": [0, 1, 3, 4, 5, 6], "quantifi": [1, 2, 3, 5, 6], "quantification_error": 3, "quantiti": 3, "quapy_data": 3, "quay_data": 5, "que": 3, "question": 3, "quevedo": 5, "quick": 5, "quit": 3, "r": [3, 5, 6], "rae": [1, 2, 3], "rais": [3, 6], "raise_error": 3, "raise_except": 3, "rand": 3, "random": [3, 5], "random_se": 3, "random_st": [1, 2, 3, 5, 6], "randomli": 5, "rang": [3, 6], "rank": [4, 6], "rare": 5, "rate": [3, 4, 6], "raw": [3, 5], "re": [4, 5], "reach": 6, "read": 5, "reader": [1, 2, 3], "real": [3, 4, 5, 6], "realiz": 3, "reason": 6, "recal": 3, "recalib": 6, "recalibr": 6, "recalibratedprobabilisticclassifi": [1, 3, 4], "recalibratedprobabilisticclassifierbas": [1, 3, 4], "recip": 6, "recommend": 6, "recomput": 6, "record": [], "recurr": 5, "recurs": 6, "red_siz": 6, "reduc": [1, 3, 5], "reduce_column": [1, 3, 5], "refer": [4, 5], "refit": 3, "regardless": 5, "regim": 3, "region": 3, "regist": 6, "regress": 4, "reindex_label": [1, 3, 5], "reiniti": 4, "rel": [3, 5, 6], "relative_absolute_error": [1, 2, 3], "reli": [3, 6], "remaind": 6, "remov": [5, 6], "repeat": 3, "repetit": 3, "replac": [3, 5], "replic": 3, "repo": 5, "report": [3, 6], "repositori": 5, "repr_siz": 4, "repres": [3, 5, 6], "represent": [3, 4, 6], "reproduc": [3, 5], "request": [3, 5, 6], "requir": [4, 5, 6], "reset_net_param": [1, 3, 4], "resourc": 3, "resp": 6, "respect": [3, 6], "rest": [3, 4, 5], "result": [3, 6], "resultsubmiss": [], "retain": [4, 6], "retrain": 4, "return": [3, 4, 5, 6], "return_constrained_dim": 3, "return_typ": [1, 2, 3], "reus": [3, 5], "review": 5, "reviews_sentiment_dataset": 5, "rgmin_": [], "right": [3, 5, 6], "rodr\u00edguez": 6, "round": 5, "routin": [3, 5, 6], "row": 5, "run": [3, 5, 6], "runtest": [], "saeren": 6, "same": [3, 5, 6], "sampl": [1, 2, 3, 4, 5, 6], "sample_id": [], "sample_prev": 3, "sample_s": [3, 6], "samples_paramet": [1, 2, 3], "samples_to_str": [], "samplesfromdir": 5, "sampling_from_index": [1, 3, 5], "sampling_index": [1, 3, 5], "sander": 5, "sanity_check": 3, "save": [3, 5], "save_text_fil": [1, 2, 3], "savepath": 3, "scale": [3, 4, 6], "scall": 5, "schwarz": 6, "scikit": [3, 4, 5, 6], "scipi": [3, 5], "score": [3, 4, 5, 6], "scott": 6, "script": [5, 6], "se": [1, 2, 3], "search": [1, 3, 6], "sebastiani": [5, 6], "second": [3, 5], "secondari": 3, "see": [3, 4, 5, 6], "seed": [3, 5, 6], "seen": [3, 6], "select": [3, 5, 6], "self": [3, 4, 5, 6], "semeval13": 5, "semeval14": 5, "semeval15": 5, "semeval16": 5, "sens": 3, "sentenc": 5, "sentiment": 5, "separ": [3, 5], "sequenc": 3, "seri": 5, "serv": [3, 5], "set": [3, 4, 5, 6], "set_param": [1, 2, 3, 4, 6], "setup": 6, "sever": 5, "shape": [3, 4, 5, 6], "share": 5, "shift": [3, 4, 6], "short": 4, "shorter": 4, "should": [3, 4, 5, 6], "show": [3, 4, 5, 6], "show_dens": 3, "show_legend": 3, "show_std": 3, "shown": 3, "shrikumar": 4, "shuffl": [4, 5], "side": 3, "sign": 3, "significantli": 3, "silent": [3, 6], "silverman": 6, "sim_": 6, "similar": [3, 6], "simpl": [3, 6], "simplex": 3, "simpli": [3, 6], "simplif": 6, "sinc": [5, 6], "singl": [3, 6], "single_sample_train": 5, "site": [], "size": [3, 4, 5, 6], "skip": 3, "sklearn": [4, 5, 6], "sld": [1, 3, 6], "sleep": 3, "slice": 3, "slsqp": 3, "smallest": 3, "smm": [1, 3, 6], "smooth": [1, 2, 3], "smooth_limits_epsilon": 3, "so": [3, 4, 5, 6], "social": 5, "soft": 6, "solut": [3, 6], "solv": 6, "solve_adjust": [1, 3, 6], "solver": 6, "some": [3, 5, 6], "some_arrai": 3, "sort": 6, "sought": 6, "sourc": [1, 3, 4, 5, 6], "space": [3, 4, 6], "spars": 5, "special": 5, "specif": [3, 6], "specifi": [3, 4, 5, 6], "spectrum": 3, "speed": [3, 6], "sperduti": 5, "split": [3, 4, 5, 6], "split_random": [1, 3, 5], "split_stratifi": [1, 3, 5], "splitstratifi": [1, 3, 5], "spmatrix": 5, "sqrt": [3, 6], "squar": [3, 6], "sst": 5, "stabil": 6, "stabl": 5, "stackexchang": 3, "stand": [3, 6], "standard": [1, 3, 4, 5, 6], "star": 3, "start_msg": 3, "stat": [1, 3, 5], "state": 3, "static": 6, "statist": [3, 6], "stats_siz": 6, "statu": [1, 2, 3], "std": 4, "stdout": 3, "step": [3, 6], "stop": [3, 4, 6], "store": [4, 5, 6], "str": [3, 5, 6], "stratif": 5, "stratifi": [4, 5, 6], "stride": 4, "string": [3, 5, 6], "strprev": [1, 2, 3], "structur": 6, "studi": 5, "style": 5, "subclass": [5, 6], "subdir": 3, "sublinear_tf": 5, "submodul": [1, 2], "subobject": 6, "subpackag": [1, 2], "subsequ": 5, "subtract": [3, 5], "subtyp": 5, "success": [1, 2, 3], "suit": 5, "sum": [3, 6], "sum_": [3, 6], "support": [5, 6], "surfac": 5, "svm": [4, 5, 6], "svm_perf_classifi": 4, "svm_perf_learn": 4, "svmperf": [1, 2, 3, 6], "svmperf_bas": [4, 6], "svmperf_hom": 6, "sweep": 6, "system": 6, "t": [3, 4, 6], "t1a": 5, "t1b": 5, "t2a": 5, "t2b": 5, "t50": [1, 3, 6], "tab10": 3, "tail": 3, "tail_density_threshold": 3, "take": [3, 5, 6], "taken": [3, 4, 5, 6], "target": [3, 4, 6], "task": 5, "te": 5, "temp": 6, "temp_se": [1, 2, 3], "temperatur": [4, 6], "tempor": [3, 4], "tensor": 4, "term": [3, 4, 5, 6], "ternari": 6, "test": [3, 4, 5, 6], "test_aggreg": [], "test_aggregative_method": [], "test_app_not_repl": [], "test_app_numb": [], "test_app_repl": [], "test_app_sanity_check": [], "test_bas": [], "test_binari": [], "test_covariate_shift_not_repl": [], "test_covariate_shift_repl": [], "test_dataset": [], "test_ensemble_method": [], "test_eval_speedup": [], "test_evalu": [], "test_evaluation_output": [], "test_fetch_lequa2022": [], "test_fetch_review": [], "test_fetch_twitt": [], "test_fetch_ucidataset": [], "test_fetch_ucimultidataset": [], "test_gen": 5, "test_hierarchi": [], "test_import": [], "test_join": [], "test_kraemer_not_repl": [], "test_kraemer_repl": [], "test_labelcollect": [], "test_median_meta": [], "test_median_meta_modsel": [], "test_method": [], "test_modsel": [], "test_modsel_parallel": [], "test_modsel_parallel_speedup": [], "test_modsel_timeout": [], "test_no_seed_init": [], "test_non_aggregative_method": [], "test_npp_not_repl": [], "test_npp_repl": [], "test_parallel_replic": [], "test_path": 5, "test_prediction_replic": [], "test_prevalences_path": [], "test_probabilist": [], "test_protocol": [], "test_quanet_method": [], "test_replic": [], "test_samping_replic": [], "test_split": 5, "test_str_label_nam": [], "testcas": [], "testprotocol": [], "text": [3, 4, 5, 6], "text2tfidf": [1, 3, 5], "textclassifiernet": [1, 3, 4], "textual": 5, "tf": 5, "tfidf": 5, "tfidfvector": 5, "th": 6, "than": [3, 4, 5, 6], "thei": 6, "them": [5, 6], "therefor": 3, "thi": [1, 3, 4, 5, 6], "thorsten": 4, "those": [3, 4, 6], "though": 3, "threshold": [3, 6], "thresholdoptim": [1, 3, 6], "through": 3, "thu": [3, 4, 6], "time": [3, 5, 6], "timeout": [1, 2, 3], "timeouterror": 3, "timer": 3, "titl": 3, "tmp": 4, "tn": 3, "togeth": 5, "token": [4, 5], "tol": 6, "toler": 6, "toleranz": 3, "top": [3, 6], "topso": [3, 6], "topsoedist": [1, 2, 3], "torch": [4, 6], "torchdataset": [1, 3, 4], "total": [1, 2, 3], "toward": [5, 6], "tp": 3, "tpr": [3, 6], "tr": 5, "tr_iter_per_poch": 6, "tr_prev": [3, 6], "track": 3, "trade": [4, 6], "train": [3, 4, 5, 6], "train_path": 5, "train_prev": 3, "train_prop": 5, "train_siz": 5, "train_test": [1, 3, 5], "trainer": 4, "transform": [1, 3, 4, 5, 6], "tri": 6, "trial": 6, "true": [3, 4, 5, 6], "true_prev": 3, "truncatedsvd": 4, "truth": 6, "tscalibr": [1, 3, 4], "ttest_alpha": 3, "tupl": [3, 5, 6], "tweet": 5, "twitter": 5, "twitter_sentiment_datasets_test": 5, "twitter_sentiment_datasets_train": 5, "two": [3, 5, 6], "txt": 3, "type": [3, 5, 6], "typic": [3, 4, 5, 6], "u": 6, "u1": 5, "uci": 5, "uci_dataset": 5, "uci_multiclass_dataset": 5, "ucimlrepo": 5, "unalt": 4, "underli": 6, "underlin": 3, "understand": 3, "undertaken": 3, "unifi": 6, "uniform": [3, 5, 6], "uniform_prevalence_sampl": [1, 2, 3], "uniform_sampl": [1, 3, 5], "uniform_sampling_index": [1, 3, 5], "uniform_simplex_sampl": [1, 2, 3], "uniformli": 3, "uniformprevalenceprotocol": [1, 2, 3], "union": 5, "uniqu": 5, "unit": [3, 6], "unk": 5, "unknown": 5, "unlabel": 6, "unlik": 3, "until": 6, "unus": [3, 4], "up": [3, 4, 6], "updat": 6, "upp": [1, 2, 3], "url": 3, "us": [3, 4, 5, 6], "user": 3, "utf": 5, "util": [1, 2, 4, 5], "v": [3, 4, 6], "va_iter_per_poch": 6, "val": [4, 5], "val_gen": 5, "val_split": [1, 3, 4, 6], "val_split_": [1, 3, 6], "valid": [3, 4, 5, 6], "valid_loss": [1, 3, 4, 6], "valid_polici": [1, 3, 6], "valu": [3, 4, 5, 6], "valueerror": 3, "variabl": [3, 5], "variant": [3, 6], "vector": [3, 4, 5, 6], "verbos": [3, 4, 5, 6], "veri": 3, "version": [3, 4], "vertic": 3, "vertical_xtick": 3, "via": [3, 4, 6], "vline": 3, "vocab_s": 4, "vocabulari": [4, 5], "vocabulary_s": [1, 3, 4, 5, 6], "vscalibr": [1, 3, 4], "w": 5, "wa": [3, 5, 6], "wai": 6, "wait": 4, "warn": [1, 3, 5, 6], "wb": 5, "we": 5, "weight": [4, 5], "weight_decai": 4, "well": 6, "whcih": 5, "when": [3, 4, 5, 6], "whenev": 3, "where": [3, 4, 5, 6], "whether": [3, 4, 5, 6], "which": [3, 4, 5, 6], "while": [4, 5, 6], "whoi": 5, "whole": [3, 4], "whose": [5, 6], "widetild": 6, "within": [3, 6], "without": [3, 5], "word": [4, 5, 6], "work": [3, 5, 6], "worker": [3, 4, 5, 6], "workshop": 6, "wors": 3, "would": [5, 6], "wrap": 6, "wrap_cls_param": [], "wrapper": [3, 4, 5, 6], "x": [1, 3, 4, 5, 6], "x2": 5, "x_1": 6, "x_error": 3, "x_i": 6, "x_t": 6, "xavier": 4, "xavier_uniform": [1, 3, 4], "xp": [1, 3, 5], "xy": [1, 3, 5], "y": [1, 3, 4, 5, 6], "y_": 6, "y_error": 3, "y_i": 6, "y_j": 6, "y_pred": 3, "y_true": 3, "yeast": 5, "yield": [3, 5, 6], "z": 5, "z_": 3, "zenodo": 5, "zero": 3, "zip": 3}, "titles": ["quapy", "Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "titleterms": {"": 1, "A": [], "aggreg": 6, "api": [], "base": [5, 6], "benchmark": [], "calibr": 4, "classif": 4, "content": [1, 3, 4, 5, 6], "data": 5, "dataset": 5, "document": 1, "error": 3, "evalu": 3, "framework": [], "function": 3, "github": 1, "indic": 1, "instal": 1, "meta": 6, "method": [4, 6], "model_select": 3, "modul": [3, 4, 5, 6], "neural": 4, "non_aggreg": 6, "open": [], "packag": [3, 4, 5, 6], "plot": 3, "preprocess": 5, "protocol": 3, "python": [], "quantif": [], "quapi": [0, 1, 2, 3, 4, 5, 6], "reader": 5, "sourc": [], "submodul": [3, 4, 5, 6], "subpackag": 3, "svmperf": 4, "tabl": 1, "test": [], "test_bas": [], "test_dataset": [], "test_evalu": [], "test_hierarchi": [], "test_labelcollect": [], "test_method": [], "test_modsel": [], "test_protocol": [], "test_replic": [], "typic": [], "util": 3, "welcom": 1}}) \ No newline at end of file +Search.setIndex({"docnames": ["index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"i": [0, 2, 3, 4, 5], "python": [0, 4], "base": [0, 1, 2, 3], "open": [0, 2, 4], "sourc": [0, 2, 3, 4, 5], "framework": [0, 5], "quantif": [0, 2, 3, 4, 5], "thi": [0, 2, 3, 4, 5], "contain": [0, 2, 3, 4, 5], "api": 0, "modul": [0, 1], "includ": [0, 4, 5], "pip": [0, 5], "host": 0, "http": [0, 2, 4, 5], "com": [0, 2], "hlt": 0, "isti": 0, "packag": [0, 1], "subpackag": [0, 1], "classif": [0, 1, 2, 4, 5], "submodul": [0, 1], "calibr": [0, 1, 2], "bctscalibr": [0, 2, 3], "nbvscalibr": [0, 2, 3], "recalibratedprobabilisticclassifi": [0, 2, 3], "recalibratedprobabilisticclassifierbas": [0, 2, 3], "classes_": [0, 2, 3, 4, 5], "fit": [0, 1, 2, 3, 4, 5], "fit_cv": [0, 2, 3], "fit_tr_val": [0, 2, 3], "predict": [0, 1, 2, 3, 5], "predict_proba": [0, 2, 3, 5], "tscalibr": [0, 2, 3], "vscalibr": [0, 2, 3], "method": [0, 1, 2], "lowranklogisticregress": [0, 2, 3], "get_param": [0, 1, 2, 3, 5], "set_param": [0, 1, 2, 3, 5], "transform": [0, 2, 3, 4, 5], "neural": [0, 1, 2, 4, 5], "cnnnet": [0, 2, 3, 5], "document_embed": [0, 2, 3], "train": [0, 2, 3, 4, 5], "vocabulary_s": [0, 2, 3, 4, 5], "lstmnet": [0, 2, 3], "neuralclassifiertrain": [0, 2, 3, 5], "devic": [0, 2, 3, 5], "reset_net_param": [0, 2, 3], "textclassifiernet": [0, 2, 3], "dimens": [0, 2, 3, 4, 5], "forward": [0, 2, 3, 5], "xavier_uniform": [0, 2, 3], "torchdataset": [0, 2, 3], "asdataload": [0, 2, 3], "svmperf": [0, 1, 2, 5], "decision_funct": [0, 2, 3, 5], "valid_loss": [0, 2, 3, 5], "data": [0, 1, 2, 3, 5], "dataset": [0, 1, 2, 3, 5], "splitstratifi": [0, 2, 4], "binari": [0, 2, 3, 4, 5], "kfcv": [0, 2, 3, 4], "load": [0, 2, 4, 5], "n_class": [0, 2, 3, 4, 5], "reduc": [0, 2, 4], "stat": [0, 2, 4], "train_test": [0, 2, 4], "labelledcollect": [0, 2, 4, 5], "x": [0, 2, 3, 4, 5], "xp": [0, 2, 4], "xy": [0, 2, 4], "count": [0, 2, 4, 5], "join": [0, 2, 4], "p": [0, 2, 3, 4, 5], "preval": [0, 2, 3, 4, 5], "sampl": [0, 1, 2, 3, 4, 5], "sampling_from_index": [0, 2, 4], "sampling_index": [0, 2, 4], "split_random": [0, 2, 4], "split_stratifi": [0, 2, 4], "uniform_sampl": [0, 2, 4], "uniform_sampling_index": [0, 2, 4], "y": [0, 2, 3, 4, 5], "fetch_ifcb": [0, 2, 4], "fetch_ucibinarydataset": [0, 2, 4], "fetch_ucibinarylabelledcollect": [0, 2, 4], "fetch_ucimulticlassdataset": [0, 2, 4], "fetch_ucimulticlasslabelledcollect": [0, 2, 4], "fetch_lequa2022": [0, 2, 4], "fetch_review": [0, 2, 4, 5], "fetch_twitt": [0, 2, 4], "warn": [0, 2, 4, 5], "preprocess": [0, 1, 2, 5], "indextransform": [0, 2, 4], "add_word": [0, 2, 4], "fit_transform": [0, 2, 4], "index": [0, 2, 3, 4, 5], "reduce_column": [0, 2, 4], "standard": [0, 2, 3, 4, 5], "text2tfidf": [0, 2, 4], "reader": [0, 1, 2], "binar": [0, 2, 4], "from_csv": [0, 2, 4], "from_spars": [0, 2, 4], "from_text": [0, 2, 4], "reindex_label": [0, 2, 4], "aggreg": [0, 1, 2], "acc": [0, 1, 2, 5], "clip": [0, 2, 5], "solver": [0, 2, 5], "aggregation_fit": [0, 2, 5], "getptecondestim": [0, 2, 5], "newinvariantratioestim": [0, 2, 5], "adjustedclassifyandcount": [0, 2, 5], "aggregativecrispquantifi": [0, 2, 5], "aggregativemedianestim": [0, 2, 5], "quantifi": [0, 1, 2, 4, 5], "aggregativequantifi": [0, 2, 5], "classifi": [0, 2, 3, 5], "classifier_fit_predict": [0, 2, 5], "val_split": [0, 2, 3, 5], "val_split_": [0, 2, 5], "aggregativesoftquantifi": [0, 2, 5], "bayesiancc": [0, 2, 5], "get_conditional_probability_sampl": [0, 2, 5], "get_prevalence_sampl": [0, 2, 5], "sample_from_posterior": [0, 2, 5], "binaryaggregativequantifi": [0, 2, 5], "neg_label": [0, 2, 5], "pos_label": [0, 2, 5], "cc": [0, 2, 5], "classifyandcount": [0, 2, 5], "dmy": [0, 2, 5], "distributionmatchingi": [0, 2, 5], "dy": [0, 2, 5], "emq": [0, 2, 5], "em": [0, 2, 5], "emq_bct": [0, 2, 5], "epsilon": [0, 2, 5], "max_it": [0, 2, 5], "expectationmaximizationquantifi": [0, 2, 5], "hdy": [0, 2, 5], "hellingerdistancei": [0, 2, 5], "onevsallaggreg": [0, 2, 5], "pacc": [0, 2, 5], "pcc": [0, 2, 5], "probabilisticadjustedclassifyandcount": [0, 2, 5], "probabilisticclassifyandcount": [0, 2, 5], "sld": [0, 2, 5], "smm": [0, 2, 5], "newelm": [0, 2, 5], "newsvma": [0, 2, 5], "newsvmkld": [0, 2, 5], "newsvmq": [0, 2, 5], "newsvmra": [0, 2, 5], "kdebas": [0, 2, 5], "bandwidth_method": [0, 2, 5], "get_kde_funct": [0, 2, 5], "get_mixture_compon": [0, 2, 5], "pdf": [0, 2, 5], "kdeyc": [0, 2, 5], "gram_matrix_mix_sum": [0, 2, 5], "kdeyhd": [0, 2, 5], "kdeyml": [0, 2, 5], "quanetmodul": [0, 2, 5], "quanettrain": [0, 2, 5], "clean_checkpoint": [0, 2, 5], "clean_checkpoint_dir": [0, 2, 5], "mae_loss": [0, 2, 5], "max": [0, 2, 5], "condit": [0, 2, 5], "m": [0, 2, 5], "ms2": [0, 2, 5], "discard": [0, 2, 5], "t50": [0, 2, 5], "thresholdoptim": [0, 2, 5], "aggregate_with_threshold": [0, 2, 5], "basequantifi": [0, 2, 5], "binaryquantifi": [0, 2, 5], "onevsal": [0, 2, 5], "onevsallgener": [0, 2, 5], "newonevsal": [0, 2, 5], "meta": [0, 1, 2], "eacc": [0, 2, 5], "ecc": [0, 2, 5], "eemq": [0, 2, 5], "ehdi": [0, 2, 5], "epacc": [0, 2, 5], "ensembl": [0, 2, 4, 5], "valid_polici": [0, 2, 5], "probabilist": [0, 2, 3, 5], "medianestim": [0, 2, 5], "medianestimator2": [0, 2, 5], "ensemblefactori": [0, 2, 5], "get_probability_distribut": [0, 2, 5], "non_aggreg": [0, 1, 2], "dmx": [0, 2, 5], "hdx": [0, 2, 5], "distributionmatchingx": [0, 2, 5], "maximumlikelihoodprevalenceestim": [0, 2, 5], "readm": [0, 2, 5], "std_constrained_linear_l": [0, 2, 5], "error": [0, 1, 3, 5], "absolute_error": [0, 1, 2], "acc_error": [0, 1, 2], "ae": [0, 1, 2], "f1_error": [0, 1, 2], "f1e": [0, 1, 2], "from_nam": [0, 1, 2], "kld": [0, 1, 2, 3, 5], "mae": [0, 1, 2, 3, 5], "mean_absolute_error": [0, 1, 2], "mean_normalized_absolute_error": [0, 1, 2], "mean_normalized_relative_absolute_error": [0, 1, 2], "mean_relative_absolute_error": [0, 1, 2], "mkld": [0, 1, 2, 5], "mnae": [0, 1, 2, 5], "mnkld": [0, 1, 2, 5], "mnrae": [0, 1, 2, 5], "mrae": [0, 1, 2, 3, 5], "mse": [0, 1, 2, 5], "nae": [0, 1, 2], "nkld": [0, 1, 2, 3, 5], "normalized_absolute_error": [0, 1, 2], "normalized_relative_absolute_error": [0, 1, 2], "nrae": [0, 1, 2], "rae": [0, 1, 2], "relative_absolute_error": [0, 1, 2], "se": [0, 1, 2], "smooth": [0, 1, 2], "evalu": [0, 1, 3, 4, 5], "evaluate_on_sampl": [0, 1, 2], "evaluation_report": [0, 1, 2], "function": [0, 1, 3, 4, 5], "hellingerdist": [0, 1, 2], "topsoedist": [0, 1, 2], "adjusted_quantif": [0, 1, 2], "argmin_preval": [0, 1, 2], "as_binary_preval": [0, 1, 2], "check_prevalence_vector": [0, 1, 2], "clip_preval": [0, 1, 2], "counts_from_label": [0, 1, 2], "get_diverg": [0, 1, 2], "get_nprevpoints_approxim": [0, 1, 2], "linear_search": [0, 1, 2], "map_onto_probability_simplex": [0, 1, 2], "normalize_preval": [0, 1, 2], "num_prevalence_combin": [0, 1, 2], "optim_minim": [0, 1, 2, 5], "prevalence_from_label": [0, 1, 2], "prevalence_from_prob": [0, 1, 2], "prevalence_linspac": [0, 1, 2], "solve_adjust": [0, 1, 2], "strprev": [0, 1, 2], "uniform_prevalence_sampl": [0, 1, 2], "uniform_simplex_sampl": [0, 1, 2], "model_select": [0, 1, 5], "configstatu": [0, 1, 2], "fail": [0, 1, 2], "success": [0, 1, 2], "gridsearchq": [0, 1, 2, 5], "best_model": [0, 1, 2], "statu": [0, 1, 2], "invalid": [0, 1, 2], "timeout": [0, 1, 2], "cross_val_predict": [0, 1, 2], "expand_grid": [0, 1, 2], "group_param": [0, 1, 2], "plot": [0, 1], "binary_bias_bin": [0, 1, 2], "binary_bias_glob": [0, 1, 2], "binary_diagon": [0, 1, 2], "brokenbar_supremacy_by_drift": [0, 1, 2], "error_by_drift": [0, 1, 2], "protocol": [0, 1, 4, 5], "app": [0, 1, 2, 5], "prevalence_grid": [0, 1, 2], "samples_paramet": [0, 1, 2], "total": [0, 1, 2], "abstractprotocol": [0, 1, 2, 4], "abstractstochasticseededprotocol": [0, 1, 2], "collat": [0, 1, 2], "random_st": [0, 1, 2, 4, 5], "artificialprevalenceprotocol": [0, 1, 2], "domainmix": [0, 1, 2], "iterateprotocol": [0, 1, 2], "npp": [0, 1, 2], "naturalprevalenceprotocol": [0, 1, 2], "onlabelledcollectionprotocol": [0, 1, 2], "return_typ": [0, 1, 2], "get_col": [0, 1, 2], "get_labelled_collect": [0, 1, 2], "on_preclassified_inst": [0, 1, 2], "upp": [0, 1, 2], "uniformprevalenceprotocol": [0, 1, 2], "util": [0, 1, 3, 4], "earlystop": [0, 1, 2], "create_if_not_exist": [0, 1, 2], "create_parent_dir": [0, 1, 2], "download_fil": [0, 1, 2], "download_file_if_not_exist": [0, 1, 2], "get_quapy_hom": [0, 1, 2], "map_parallel": [0, 1, 2], "parallel": [0, 1, 2, 3, 4, 5], "parallel_unpack": [0, 1, 2], "pickled_resourc": [0, 1, 2], "save_text_fil": [0, 1, 2], "temp_se": [0, 1, 2], "search": [0, 2, 5], "page": 0, "content": 1, "implement": [2, 3, 4, 5], "measur": [2, 5], "us": [2, 3, 4, 5], "prev": [2, 4], "prevs_hat": 2, "comput": [2, 5], "absolut": [2, 5], "between": [2, 3, 5], "two": [2, 4, 5], "vector": [2, 3, 4, 5], "hat": [2, 5], "frac": [2, 5], "1": [2, 3, 4, 5], "mathcal": [2, 5], "sum_": [2, 5], "where": [2, 3, 4, 5], "ar": [2, 3, 4, 5], "class": [2, 3, 4, 5], "interest": 2, "paramet": [2, 3, 4, 5], "arrai": [2, 3, 4, 5], "like": [2, 3, 4, 5], "shape": [2, 3, 4, 5], "true": [2, 3, 4, 5], "valu": [2, 3, 4, 5], "return": [2, 3, 4, 5], "y_true": 2, "y_pred": 2, "term": [2, 3, 4, 5], "accuraci": [2, 5], "The": [2, 3, 4, 5], "tp": 2, "tn": 2, "fp": 2, "fn": 2, "stand": [2, 5], "posit": [2, 4, 5], "fals": [2, 3, 4, 5], "neg": [2, 5], "respect": [2, 5], "label": [2, 3, 4, 5], "f1": [2, 3], "simpli": [2, 5], "macro": 2, "f_1": 2, "e": [2, 3, 4, 5], "harmon": 2, "mean": [2, 3, 4, 5], "precis": 2, "recal": 2, "defin": [2, 3, 4, 5], "2tp": 2, "averag": [2, 4, 5], "each": [2, 3, 4, 5], "categori": 2, "independ": [2, 5], "err_nam": 2, "get": [2, 3, 4, 5], "an": [2, 3, 4, 5], "from": [2, 3, 4, 5], "its": [2, 3, 5], "name": [2, 3, 4, 5], "g": [2, 4, 5], "string": [2, 4, 5], "callabl": [2, 4, 5], "request": [2, 4, 5], "ep": 2, "none": [2, 3, 4, 5], "kullback": [2, 5], "leibler": [2, 5], "diverg": [2, 5], "distribut": [2, 4, 5], "d_": 2, "kl": 2, "log": [2, 4, 5], "factor": 2, "see": [2, 3, 4, 5], "case": [2, 3, 4, 5], "which": [2, 3, 4, 5], "zero": 2, "typic": [2, 3, 4, 5], "set": [2, 3, 4, 5], "2t": 2, "t": [2, 3, 5], "size": [2, 3, 4, 5], "If": [2, 4, 5], "taken": [2, 3, 4, 5], "environ": [2, 5], "variabl": [2, 4], "sample_s": [2, 5], "ha": [2, 3, 4, 5], "thu": [2, 3, 5], "beforehand": 2, "across": [2, 5], "pair": 2, "n_sampl": [2, 3], "normal": [2, 4, 5], "rel": [2, 4, 5], "squar": [2, 5], "z_": 2, "2": [2, 4, 5], "min_": [2, 5], "math": [2, 5], "2frac": 2, "underlin": 2, "displaystyl": 2, "model": [2, 3, 4, 5], "error_metr": 2, "union": [2, 4, 5], "str": [2, 4, 5], "aggr_speedup": 2, "bool": [2, 3, 5], "auto": 2, "verbos": [2, 3, 4, 5], "accord": [2, 3, 4, 5], "specif": [2, 5], "gener": [2, 3, 4, 5], "one": [2, 4, 5], "metric": [2, 5], "instanc": [2, 3, 4, 5], "object": [2, 3, 4, 5], "also": [2, 3, 5], "speed": [2, 5], "up": [2, 3, 5], "can": [2, 4, 5], "run": [2, 4, 5], "charg": [2, 4], "repres": [2, 4, 5], "": [2, 3, 4, 5], "qp": [2, 4, 5], "itself": [2, 5], "whether": [2, 3, 4, 5], "appli": [2, 3, 4, 5], "forc": 2, "even": 2, "number": [2, 3, 4, 5], "origin": [2, 4, 5], "collect": [2, 3, 4, 5], "act": 2, "larger": [2, 4, 5], "than": [2, 3, 4, 5], "default": [2, 3, 4, 5], "let": [2, 5], "decid": [2, 4], "conveni": 2, "deactiv": 2, "boolean": [2, 4, 5], "show": [2, 3, 4, 5], "inform": [2, 3, 4, 5], "stdout": 2, "score": [2, 3, 4, 5], "singl": [2, 5], "float": [2, 3, 4, 5], "iter": [2, 4, 5], "given": [2, 3, 4, 5], "list": [2, 3, 4, 5], "report": [2, 5], "panda": 2, "datafram": 2, "more": [2, 4, 5], "column": [2, 4], "estim": [2, 3, 4, 5], "mani": [2, 5], "have": [2, 4, 5], "been": [2, 3, 4, 5], "indic": [2, 3, 4, 5], "displai": [2, 3], "everi": [2, 5], "via": [2, 3, 5], "central": 2, "all": [2, 3, 4, 5], "process": [2, 4], "endow": 2, "optim": [2, 3, 5], "larg": 2, "onli": [2, 3, 4, 5], "come": [2, 4, 5], "down": [2, 4, 5], "onc": [2, 4], "over": [2, 5], "instead": [2, 4, 5], "raw": [2, 4], "so": [2, 3, 4, 5], "never": 2, "call": [2, 4, 5], "again": 2, "behaviour": 2, "obtain": [2, 3, 5], "carri": [2, 4, 5], "out": [2, 3, 4, 5], "overal": 2, "need": [2, 4, 5], "exce": 2, "undertaken": 2, "issu": [2, 5], "tupl": [2, 4, 5], "true_prev": 2, "estim_prev": 2, "element": [2, 4, 5], "ndarrai": [2, 4, 5], "q": [2, 3, 5], "hellingh": 2, "distanc": [2, 5], "hd": [2, 5], "discret": [2, 5], "k": [2, 3, 4, 5], "bin": [2, 5], "sqrt": [2, 5], "p_i": 2, "q_i": 2, "real": [2, 3, 4, 5], "1e": [2, 3, 5], "20": [2, 5], "topso": [2, 5], "left": [2, 4, 5], "right": [2, 4, 5], "prevalence_estim": 2, "_supportsarrai": 2, "dtype": [2, 4], "_nestedsequ": 2, "int": [2, 4, 5], "complex": 2, "byte": 2, "tpr": [2, 5], "fpr": [2, 5], "adjust": [2, 5], "rate": [2, 3, 5], "might": [2, 4], "rang": [2, 5], "0": [2, 3, 4, 5], "loss": [2, 3, 5], "liter": [2, 5], "ternary_search": 2, "minim": [2, 5], "strategi": 2, "possibl": [2, 5], "scipi": [2, 4], "linear": [2, 5], "problem": [2, 4, 5], "space": [2, 3, 5], "01": [2, 3, 5], "02": 2, "ternari": [2, 5], "yet": 2, "np": [2, 4, 5], "positive_preval": 2, "clip_if_necessari": 2, "helper": 2, "order": [2, 4, 5], "guarante": [2, 4, 5], "result": [2, 5], "valid": [2, 3, 4, 5], "check": 2, "rais": [2, 5], "raise_except": 2, "toleranz": 2, "08": 2, "sum": [2, 5], "otherwis": [2, 4, 5], "project": [2, 5], "proport": [2, 3, 4, 5], "probabl": [2, 3, 5], "perform": [2, 3, 5], "thei": [2, 5], "onto": [2, 5], "simplex": [2, 5], "n_instanc": [2, 3, 5], "correctli": 2, "when": [2, 3, 4, 5], "some": [2, 4, 5], "exampl": [2, 3, 4, 5], "len": 2, "occurr": [2, 4], "receiv": 2, "argument": [2, 4, 5], "That": 2, "alreadi": 2, "tri": [2, 5], "instanti": [2, 3, 5], "correspond": [2, 4, 5], "combinations_budget": 2, "n_repeat": 2, "largest": 2, "equidist": 2, "point": [2, 4, 5], "combin": [2, 5], "dimension": [2, 3, 4, 5], "do": [2, 3, 4, 5], "integ": [2, 3, 4, 5], "maximum": [2, 3, 5], "allow": [2, 3, 4, 5], "repetit": 2, "less": [2, 4, 5], "best": [2, 3, 5], "explor": 2, "step": [2, 5], "ineffici": 2, "ad": 2, "complet": [2, 5], "earli": [2, 3, 5], "literatur": 2, "A": [2, 3, 4, 5], "most": [2, 4, 5], "power": 2, "altern": [2, 5], "found": [2, 3, 4, 5], "unnormalized_arr": 2, "code": [2, 3], "adapt": [2, 3], "mathieu": 2, "blondel": 2, "bsd": 2, "licens": 2, "accompani": 2, "paper": [2, 3, 5], "akinori": 2, "fujino": 2, "naonori": 2, "ueda": 2, "scale": [2, 3, 5], "multiclass": [2, 4, 5], "support": [2, 4, 5], "machin": [2, 3], "euclidean": 2, "icpr": 2, "2014": 2, "url": 2, "n": [2, 3, 5], "v": [2, 3, 5], "matrix": [2, 5], "consist": [2, 3, 4, 5], "l1": [2, 5], "convert": [2, 3, 4, 5], "n_prevpoint": 2, "equal": [2, 5], "distant": 2, "calcul": [2, 5], "binom": 2, "c": [2, 3, 4, 5], "time": [2, 4, 5], "r": [2, 4, 5], "mass": 2, "block": 2, "alloc": [2, 3], "solut": [2, 5], "star": 2, "bar": 2, "For": [2, 4, 5], "5": [2, 3, 4, 5], "25": [2, 3, 5], "75": [2, 5], "50": [2, 5], "yield": [2, 4, 5], "smallest": 2, "lost": 2, "constrain": [2, 4], "slsqp": 2, "routin": [2, 4, 5], "posterior": [2, 3, 5], "crisp": [2, 5], "decis": [2, 3, 5], "take": [2, 4, 5], "argmax": 2, "grid_point": 2, "21": 2, "repeat": 2, "smooth_limits_epsilon": 2, "produc": 2, "uniformli": 2, "separ": [2, 4], "By": 2, "05": [2, 5], "limit": [2, 5], "10": [2, 3, 5], "15": [2, 4], "90": 2, "95": 2, "99": 2, "interv": 2, "quantiti": 2, "add": [2, 4], "subtract": [2, 4], "p_c_cond_i": 2, "p_c": [2, 5], "invers": [2, 5], "invari": [2, 5], "ratio": [2, 5], "exact": [2, 4, 5], "solv": [2, 5], "equat": [2, 5], "misclassif": [2, 5], "entri": [2, 5], "being": [2, 5], "belong": [2, 5], "end": [2, 5], "option": [2, 4, 5], "mai": 2, "exist": 2, "degener": 2, "vaz": [2, 5], "et": [2, 3, 4, 5], "al": [2, 3, 4, 5], "replac": [2, 4, 5], "last": [2, 3, 4, 5], "system": [2, 5], "rank": [2, 3, 5], "strictli": [2, 5], "full": [2, 4, 5], "deprec": [2, 5], "alwai": [2, 5], "prec": 2, "3": [2, 3, 4, 5], "represent": [2, 3, 5], "33": 2, "67": 2, "kraemer": 2, "algorithm": [2, 4, 5], "random": [2, 4, 5], "unit": [2, 5], "post": 2, "stackexchang": 2, "question": 2, "3227": 2, "uniform": [2, 4, 5], "_": [2, 4, 5], "param": [2, 3, 5], "msg": 2, "param_grid": [2, 5], "dict": [2, 4, 5], "type": [2, 4, 5], "refit": 2, "n_job": [2, 3, 4, 5], "raise_error": 2, "grid": [2, 5], "target": [2, 3, 5], "orient": [2, 5], "hyperparamet": [2, 5], "dictionari": [2, 3, 4, 5], "kei": [2, 4], "ones": [2, 4, 5], "those": [2, 3, 5], "quantification_error": 2, "whole": [2, 3], "chosen": 2, "ignor": [2, 4, 5], "gen": 2, "establish": 2, "timer": 2, "second": [2, 4], "configur": [2, 5], "test": [2, 3, 4, 5], "whenev": 2, "longer": [2, 5], "timeouterror": 2, "except": [2, 5], "bound": [2, 5], "ani": [2, 3, 4, 5], "mark": 2, "goe": 2, "howev": 2, "valueerror": 2, "through": 2, "after": [2, 5], "hyper": [2, 3, 5], "learn": [2, 3, 4, 5], "select": [2, 4, 5], "self": [2, 3, 4, 5], "deep": [2, 5], "unus": [2, 3], "contanin": 2, "enum": 2, "enumer": 2, "4": [2, 4], "nfold": [2, 4], "akin": [2, 5], "scikit": [2, 3, 4, 5], "fold": [2, 4, 5], "cross": [2, 3, 4, 5], "seed": [2, 4, 5], "reproduc": [2, 4], "expand": 2, "100": [2, 3, 4, 5], "b": [2, 4, 5], "print": [2, 3, 4], "assign": [2, 4], "partit": [2, 3], "anoth": [2, 5], "que": 2, "method_nam": 2, "pos_class": [2, 4], "titl": 2, "nbin": [2, 5], "colormap": 2, "matplotlib": 2, "color": 2, "listedcolormap": 2, "vertical_xtick": 2, "legend": 2, "savepath": 2, "box": 2, "local": 2, "bia": [2, 3, 5], "sign": 2, "minu": 2, "differ": [2, 4, 5], "classs": 2, "experi": [2, 4], "compon": [2, 3, 5], "cm": 2, "tab10": 2, "secondari": 2, "path": [2, 3, 4, 5], "save": [2, 4], "shown": 2, "global": 2, "show_std": 2, "train_prev": 2, "method_ord": 2, "diagon": 2, "along": [2, 5], "axi": 2, "describ": [2, 5], "henc": [2, 4, 5], "It": [2, 4], "though": 2, "other": [2, 4, 5], "prefer": 2, "deviat": [2, 4], "band": 2, "inconveni": 2, "compar": 2, "high": [2, 5], "leyend": 2, "hightlight": 2, "conduct": 2, "same": [2, 4, 5], "impos": 2, "associ": 2, "tr_prev": [2, 5], "n_bin": [2, 5], "isomer": 2, "x_error": 2, "y_error": 2, "ttest_alpha": 2, "005": 2, "tail_density_threshold": 2, "top": [2, 5], "region": 2, "shift": [2, 3, 5], "form": [2, 4, 5], "broken": 2, "chart": 2, "either": 2, "follow": [2, 4, 5], "hold": [2, 5], "ii": 2, "statist": [2, 5], "significantli": 2, "side": 2, "confid": 2, "made": [2, 4, 5], "isometr": 2, "percentil": 2, "divid": 2, "amount": [2, 5], "abov": 2, "consid": [2, 3, 4, 5], "involv": 2, "similar": [2, 5], "threshold": [2, 5], "densiti": [2, 5], "below": [2, 4], "tail": 2, "avoid": 2, "outlier": 2, "error_nam": 2, "show_dens": 2, "show_legend": 2, "logscal": 2, "vline": 2, "especi": 2, "cumberson": 2, "gain": 2, "understand": 2, "about": [2, 4, 5], "how": [2, 4, 5], "fare": 2, "prior": [2, 5], "spectrum": 2, "low": [2, 3], "regim": 2, "highlight": 2, "vertic": 2, "dot": 2, "line": 2, "n_preval": 2, "sanity_check": 2, "10000": [2, 5], "sample_prev": 2, "artifici": 2, "drawn": [2, 4], "extract": [2, 4, 5], "copi": [2, 4], "replic": 2, "sequenc": 2, "user": 2, "skip": 2, "labelled_collect": 2, "exhaust": 2, "depend": [2, 5], "11": 2, "9": 2, "implicit": 2, "return_constrained_dim": 2, "rest": [2, 3, 4], "note": [2, 4], "quit": 2, "obvious": 2, "doe": [2, 5], "determinist": 2, "anywher": 2, "multipli": 2, "realiz": 2, "necessari": [2, 5], "abstract": [2, 3, 4, 5], "parent": 2, "known": [2, 5], "procedur": 2, "enforc": 2, "fulli": 2, "In": [2, 3, 4, 5], "make": [2, 5], "extend": [2, 5], "input": [2, 3, 4, 5], "arg": [2, 3, 4, 5], "prepar": 2, "accommod": 2, "desir": [2, 4], "output": [2, 3, 4, 5], "format": [2, 4, 5], "befor": [2, 3, 4, 5], "inherit": 2, "custom": [2, 4], "addit": 2, "adher": 2, "properti": [2, 3, 4, 5], "determin": 2, "serv": [2, 4], "alia": [2, 4, 5], "domaina": 2, "domainb": 2, "mixture_point": 2, "mixtur": [2, 5], "domain": 2, "control": 2, "preserv": [2, 4], "draw": [2, 5], "specifi": [2, 3, 4, 5], "should": [2, 3, 4, 5], "zip": 2, "veri": 2, "simpl": [2, 5], "previous": [2, 5], "natur": 2, "therefor": 2, "approxim": [2, 3], "classmethod": [2, 4, 5], "pre_classif": 2, "in_plac": 2, "modifi": 2, "version": [2, 3], "pre": 2, "advanc": 2, "hard": [2, 3, 5], "modif": 2, "place": [2, 4], "new": [2, 4], "variant": [2, 5], "reli": [2, 5], "cover": [2, 3], "entir": 2, "sens": 2, "unlik": 2, "endeavour": 2, "intract": 2, "patienc": [2, 3, 5], "lower_is_bett": 2, "stop": [2, 3, 5], "network": [2, 3, 4, 5], "epoch": [2, 3, 5], "7": [2, 3, 5], "improv": [2, 3, 5], "best_epoch": 2, "best_scor": 2, "consecut": [2, 3, 4, 5], "monitor": 2, "obtaind": 2, "held": [2, 3, 5], "split": [2, 3, 4, 5], "wors": 2, "far": [2, 3, 4], "flag": 2, "keep": [2, 4], "track": 2, "seen": [2, 5], "wa": [2, 4, 5], "o": 2, "makedir": 2, "exist_ok": 2, "dir": [2, 5], "subdir": 2, "anotherdir": 2, "creat": [2, 5], "file": [2, 3, 4, 5], "txt": 2, "archive_filenam": 2, "download": [2, 4], "destin": 2, "filenam": 2, "dowload": 2, "home": [2, 4], "directori": [2, 3, 4, 5], "perman": 2, "quapy_data": 2, "func": 2, "slice": 2, "item": 2, "work": [2, 4, 5], "pass": [2, 3, 5], "worker": [2, 3, 4, 5], "asarrai": 2, "backend": [2, 5], "loki": [2, 5], "wrapper": [2, 3, 4, 5], "multiprocess": [2, 5], "delai": 2, "args_i": 2, "silent": [2, 5], "child": 2, "ensur": 2, "numer": [2, 4, 5], "handl": 2, "open_arg": 2, "pickle_path": 2, "generation_func": 2, "fast": [2, 4], "reus": [2, 4], "resourc": 2, "next": [2, 3, 4], "invok": [2, 4], "pickl": [2, 4, 5], "def": 2, "some_arrai": 2, "mock": [2, 3], "rand": 2, "my_arrai": 2, "pkl": 2, "first": [2, 4, 5], "text": [2, 3, 4, 5], "disk": [2, 4], "miss": 2, "context": 2, "tempor": [2, 3], "without": [2, 4], "outer": 2, "numpi": [2, 3], "current": [2, 3, 4, 5], "state": 2, "random_se": 2, "within": [2, 5], "launch": 2, "close": [2, 4, 5], "start_msg": 2, "end_msg": 2, "sleep": 2, "begin": 2, "correct": [3, 5], "temperatur": [3, 5], "bct": [3, 5], "abstent": 3, "alexandari": [3, 5], "stratifi": [3, 4, 5], "retrain": 3, "afterward": [3, 5], "No": [3, 5], "nbv": [3, 5], "re": [3, 4], "kundaj": 3, "shrikumar": 3, "2020": 3, "novemb": 3, "likelihood": [3, 5], "beat": [3, 5], "intern": [3, 4, 5], "confer": [3, 4], "pp": 3, "222": 3, "232": 3, "pmlr": 3, "baseestim": [3, 5], "calibratorfactori": 3, "n_featur": [3, 5], "manner": [3, 5], "val": [3, 4], "These": [3, 5], "n_compon": 3, "kwarg": [3, 4, 5], "embed": [3, 5], "requir": [3, 4, 5], "quanet": [3, 5], "easili": 3, "sklearn": [3, 4, 5], "decomposit": 3, "truncatedsvd": 3, "while": [3, 4, 5], "linear_model": 3, "logisticregress": [3, 5], "princip": 3, "retain": [3, 5], "logist": [3, 5], "regress": 3, "map": [3, 5], "length": [3, 4], "eventu": [3, 4], "unalt": 3, "emb": 3, "embedding_s": 3, "hidden_s": 3, "256": 3, "repr_siz": 3, "kernel_height": 3, "stride": 3, "pad": [3, 4], "drop_p": 3, "convolut": 3, "vocabulari": [3, 4], "word": [3, 4, 5], "hidden": [3, 5], "document": [3, 4, 5], "kernel": [3, 5], "token": [3, 4], "drop": 3, "dropout": [3, 5], "layer": [3, 5], "batch": 3, "torch": [3, 5], "dataload": 3, "tensor": 3, "n_dimens": [3, 5], "lstm_class_nlay": 3, "long": 3, "short": 3, "memori": 3, "lstm": [3, 5], "net": 3, "lr": [3, 5], "001": [3, 5], "weight_decai": 3, "200": 3, "batch_siz": 3, "64": [3, 5], "batch_size_test": 3, "512": [3, 5], "padding_length": 3, "300": 3, "cuda": [3, 5], "checkpointpath": 3, "checkpoint": [3, 5], "classifier_net": 3, "dat": 3, "weight": [3, 4], "decai": 3, "wait": 3, "cpu": [3, 5], "enabl": 3, "gpu": [3, 5], "store": [3, 4, 5], "vocab_s": 3, "reiniti": 3, "trainer": 3, "learner": [3, 5], "disjoint": 3, "embed_s": 3, "nn": 3, "pad_length": 3, "xavier": 3, "initi": [3, 5], "shuffl": [3, 4], "dynam": [3, 4, 5], "longest": 3, "shorter": 3, "svmperf_bas": [3, 5], "host_fold": 3, "classifiermixin": 3, "svm": [3, 4, 5], "perf": [3, 5], "thorsten": 3, "joachim": [3, 5], "patch": [3, 5], "instal": [3, 5], "further": [3, 4, 5], "detail": [3, 4, 5], "refer": [3, 4], "esuli": [3, 4, 5], "2015": [3, 5], "barranquero": [3, 5], "svm_perf_learn": 3, "svm_perf_classifi": 3, "trade": [3, 5], "off": [3, 5], "margin": [3, 5], "std": 3, "avail": [3, 4, 5], "qacc": 3, "qf1": 3, "qgm": 3, "tmp": 3, "automat": 3, "delet": 3, "multivari": 3, "12": 3, "26": 3, "27": 3, "13": 3, "22": [3, 4], "23": 3, "24": 3, "textual": 4, "train_siz": 4, "6": 4, "conform": 4, "nrepeat": 4, "around": [4, 5], "round": 4, "train_path": 4, "test_path": 4, "loader_func": 4, "loader_kwarg": 4, "read": 4, "must": [4, 5], "loader": 4, "n_train": 4, "n_test": 4, "quick": 4, "kindl": [4, 5], "tfidf": 4, "min_df": [4, 5], "tr": 4, "3821": 4, "te": 4, "21591": 4, "spars": 4, "csr": 4, "csr_matrix": 4, "featur": [4, 5], "4403": 4, "081": 4, "919": 4, "063": 4, "937": 4, "dedic": 4, "attach": 4, "them": [4, 5], "sever": 4, "infer": 4, "linearsvc": 4, "my_collect": 4, "codefram": 4, "both": 4, "frequenc": [4, 5], "actual": [4, 5], "lead": 4, "empti": 4, "sinc": [4, 5], "met": 4, "whose": [4, 5], "train_prop": 4, "randomli": 4, "stratif": 4, "greater": 4, "single_sample_train": 4, "for_model_select": 4, "data_hom": 4, "ifcb": 4, "zenodo": 4, "pleas": 4, "link": 4, "publicli": 4, "whoi": 4, "plankton": 4, "repo": 4, "script": [4, 5], "gonz\u00e1lez": [4, 5], "basic": [4, 5], "precomput": 4, "togeth": 4, "individu": 4, "30": [4, 5], "86": 4, "286": 4, "dump": 4, "leav": 4, "quay_data": 4, "test_gen": 4, "_ifcb": 4, "ifcbtrainsamplesfromdir": 4, "seri": 4, "ifcbtestsampl": 4, "dataset_nam": 4, "test_split": 4, "uci": 4, "p\u00e9rez": [4, 5], "g\u00e1llego": [4, 5], "quevedo": 4, "j": [4, 5], "del": 4, "coz": 4, "2017": [4, 5], "characteriz": 4, "chang": 4, "studi": 4, "fusion": 4, "34": [4, 5], "87": 4, "castano": 4, "2019": [4, 5], "task": 4, "45": 4, "predefin": 4, "fetch_ucilabelledcollect": 4, "access": [4, 5], "uci_dataset": 4, "ml": [4, 5], "repositori": 4, "adopt": 4, "5fcvx2": 4, "x2": 4, "import": [4, 5], "yeast": 4, "archiv": 4, "ic": 4, "edu": 4, "criteria": 4, "1000": [4, 5], "suit": 4, "ucimlrepo": 4, "dry": 4, "bean": 4, "uci_multiclass_dataset": 4, "offici": 4, "provid": [4, 5], "lequa": 4, "competit": 4, "brief": 4, "t1a": 4, "t1b": 4, "t2a": 4, "t2b": 4, "sentiment": 4, "28": 4, "merchandis": 4, "product": 4, "we": 4, "moreo": [4, 5], "sebastiani": [4, 5], "f": [4, 5], "sperduti": 4, "2022": [4, 5], "overview": 4, "clef": 4, "descript": 4, "lequa2022_experi": 4, "py": 4, "folder": [4, 5], "guid": 4, "val_gen": 4, "_lequa2022": 4, "samplesfromdir": 4, "subclass": [4, 5], "review": 4, "recurr": 4, "proceed": [4, 5], "27th": 4, "acm": [4, 5], "knowledg": 4, "manag": 4, "2018": [2, 4, 5], "reviews_sentiment_dataset": 4, "hp": 4, "imdb": 4, "matric": 4, "minimun": 4, "kept": 4, "faster": 4, "subsequ": 4, "twitter": 4, "gao": [4, 5], "w": 4, "tweet": 4, "analysi": 4, "social": 4, "mining6": 4, "19": 4, "2016": [4, 5], "semeval13": 4, "semeval14": 4, "semeval15": 4, "share": 4, "twitter_sentiment_datasets_train": 4, "twitter_sentiment_datasets_test": 4, "gasp": 4, "hcr": 4, "omd": 4, "sander": 4, "semeval16": 4, "sst": 4, "wb": 4, "devel": 4, "style": 4, "id": 4, "would": [4, 5], "countvector": 4, "keyword": [4, 5], "nogap": 4, "regardless": 4, "special": 4, "codifi": 4, "unknown": 4, "surfac": 4, "assert": 4, "gap": 4, "preced": 4, "inplac": [4, 5], "To": 4, "uniqu": 4, "rare": 4, "occur": 4, "unk": 4, "minimum": [4, 5], "org": [4, 5], "stabl": 4, "feature_extract": 4, "html": [4, 5], "subtyp": 4, "spmatrix": 4, "remov": [4, 5], "present": 4, "least": 4, "infrequ": 4, "aka": [4, 5], "z": 4, "sublinear_tf": 4, "part": 4, "scall": 4, "tf": 4, "counter": 4, "tfidfvector": 4, "categor": 4, "toward": [4, 5], "whcih": 4, "had": 4, "encod": 4, "utf": 4, "8": [4, 5], "csv": 4, "feat1": 4, "feat2": 4, "featn": 4, "covari": [4, 5], "express": 4, "col": 4, "row": 4, "class2int": 4, "collet": 4, "fomart": 4, "progress": 4, "sentenc": 4, "classnam": 4, "u1": 4, "springer": [], "articl": [], "1007": [], "s10618": [], "008": [], "0097": [], "invert": 5, "l2": 5, "norm": 5, "ax": 5, "better": 5, "consult": 5, "buns": 5, "On": 5, "multi": 5, "extens": 5, "2nd": 5, "workshop": 5, "applic": 5, "lq": 5, "ecml": 5, "pkdd": 5, "grenobl": 5, "franc": 5, "classif_predict": 5, "y_": 5, "construct": 5, "jmlr": 5, "v20": 5, "18": 5, "456": 5, "abc": 5, "base_quantifi": 5, "median": 5, "parameter": 5, "parllel": 5, "subobject": 5, "well": 5, "nest": 5, "pipelin": 5, "latter": 5, "__": 5, "updat": 5, "reason": 5, "phase": 5, "classification_fit": 5, "maintain": 5, "attribut": 5, "give": 5, "fit_classifi": 5, "predict_on": 5, "outsid": 5, "remaind": 5, "expect": 5, "non": 5, "soft": 5, "num_warmup": 5, "500": 5, "num_sampl": 5, "mcmc_seed": 5, "bayesian": 5, "rather": 5, "diagnos": 5, "degeneraci": 5, "visibl": 5, "confus": 5, "uncertainti": 5, "extra": 5, "bay": 5, "warmup": 5, "mcmc": 5, "sampler": 5, "One": 5, "noth": 5, "here": 5, "cdf": 5, "match": 5, "helling": 5, "sought": 5, "choic": 5, "channel": 5, "proper": 5, "ch": 5, "particular": 5, "di": 5, "dij": 5, "fraction": 5, "th": 5, "tol": 5, "find": 5, "got": 5, "dl": 5, "doi": 5, "1145": 5, "3219819": 5, "3220059": 5, "histogram": 5, "toler": 5, "classif_posterior": 5, "exact_train_prev": 5, "recalib": 5, "maxim": 5, "saeren": 5, "latinn": 5, "decaesteck": 5, "mutual": 5, "recurs": 5, "wai": 5, "until": 5, "converg": 5, "heurist": 5, "propos": 5, "recalibr": 5, "meant": 5, "messag": 5, "observ": 5, "posterior_prob": 5, "0001": 5, "reach": 5, "loop": 5, "ir": 5, "accordingli": 5, "unlabel": 5, "binary_quantifi": 5, "parallel_backend": 5, "prevel": 5, "emploi": 5, "joblib": 5, "help": 5, "elm": 5, "cannot": 5, "temp": 5, "dure": 5, "resp": 5, "simplif": 5, "conceptu": 5, "equival": 5, "explicit": 5, "famili": 5, "structur": 5, "purpos": 5, "svmperf_hom": 5, "properli": 5, "underli": 5, "2021": 5, "_kdei": 5, "common": 5, "ancestor": 5, "kde": 5, "scott": 5, "silverman": 5, "bandwidth": 5, "wrap": 5, "kerneldens": 5, "evalut": 5, "kdei": 5, "cauchi": 5, "schwarz": 5, "author": 5, "mont": 5, "carlo": 5, "approach": 5, "alpha": 5, "delta": 5, "d": 5, "boldsymbol": 5, "q_": 5, "widetild": 5, "u": 5, "p_": 5, "alpha_i": 5, "l": 5, "_i": 5, "p_x": 5, "x_i": 5, "h": 5, "datapoint": 5, "center": 5, "mathrm": 5, "dx": 5, "2dx": 5, "admit": 5, "montecarlo_tri": 5, "disntac": 5, "_f": 5, "trial": 5, "x_1": 5, "ldot": 5, "x_t": 5, "sim_": 5, "iid": 5, "criterion": 5, "mathbb": 5, "_neural": 5, "doc_embedding_s": 5, "stats_siz": 5, "lstm_hidden_s": 5, "lstm_nlayer": 5, "ff_layer": 5, "1024": 5, "bidirect": 5, "qdrop_p": 5, "order_bi": 5, "cell": 5, "dens": 5, "connect": 5, "ff": 5, "sort": 5, "doc_embed": 5, "doc_posterior": 5, "overridden": 5, "although": 5, "recip": 5, "former": 5, "care": 5, "regist": 5, "hook": 5, "n_epoch": 5, "tr_iter_per_poch": 5, "va_iter_per_poch": 5, "checkpointdir": 5, "checkpointnam": 5, "pytorch": 5, "advantag": 5, "cnn": 5, "estim_preval": 5, "anyth": 5, "40": 5, "66": 5, "ground": 5, "truth": 5, "_threshold_optim": 5, "forman": 5, "2006": 5, "2008": 5, "look": 5, "goal": 5, "bring": 5, "stabil": 5, "denomin": 5, "sweep": 5, "closest": 5, "choos": 5, "deliv": 5, "interpret": 5, "complement": 5, "param_mod_sel": 5, "param_model_sel": 5, "red_siz": 5, "min_po": 5, "polici": 5, "av": 5, "max_sample_s": 5, "ptr": 5, "member": 5, "preliminari": 5, "final": 5, "recomput": 5, "static": 5, "compat": 5, "recommend": 5, "gridsearchcv": 5, "base_quantifier_class": 5, "factori": 5, "unifi": 5, "interfac": 5, "logspac": 5, "class_weight": 5, "balanc": 5, "110": 5, "setup": 5, "mimick": 5, "castro": 5, "alaiz": 5, "rodr\u00edguez": 5, "alegr": 5, "2013": 5, "nfeat": 5, "dissimilar": 5, "mlpe": 5, "lazi": 5, "assum": 5, "put": 5, "assumpion": 5, "irrespect": 5, "lower": 5, "estimant": 5, "bootstrap_tri": 5, "bootstrap_rang": 5, "bagging_tri": 5, "bagging_rang": 5, "vectorizer_kwarg": 5, "class_cond_x": 5, "hat_yi": 5, "yj": 5, "yi": 5}, "objects": {"": [[2, 0, 0, "-", "quapy"]], "quapy": [[3, 0, 0, "-", "classification"], [4, 0, 0, "-", "data"], [2, 0, 0, "-", "error"], [2, 0, 0, "-", "evaluation"], [2, 0, 0, "-", "functional"], [5, 0, 0, "-", "method"], [2, 0, 0, "-", "model_selection"], [2, 0, 0, "-", "plot"], [2, 0, 0, "-", "protocol"], [2, 0, 0, "-", "util"]], "quapy.classification": [[3, 0, 0, "-", "calibration"], [3, 0, 0, "-", "methods"], [3, 0, 0, "-", "neural"], [3, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[3, 1, 1, "", "BCTSCalibration"], [3, 1, 1, "", "NBVSCalibration"], [3, 1, 1, "", "RecalibratedProbabilisticClassifier"], [3, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [3, 1, 1, "", "TSCalibration"], [3, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[3, 2, 1, "", "classes_"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "fit_cv"], [3, 3, 1, "", "fit_tr_val"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[3, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"], [3, 3, 1, "", "set_params"], [3, 3, 1, "", "transform"]], "quapy.classification.neural": [[3, 1, 1, "", "CNNnet"], [3, 1, 1, "", "LSTMnet"], [3, 1, 1, "", "NeuralClassifierTrainer"], [3, 1, 1, "", "TextClassifierNet"], [3, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "get_params"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "get_params"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[3, 2, 1, "", "device"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"], [3, 3, 1, "", "reset_net_params"], [3, 3, 1, "", "set_params"], [3, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[3, 3, 1, "", "dimensions"], [3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "forward"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict_proba"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"], [3, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[3, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[3, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[3, 3, 1, "", "decision_function"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "predict"], [3, 4, 1, "", "valid_losses"]], "quapy.data": [[4, 0, 0, "-", "base"], [4, 0, 0, "-", "datasets"], [4, 0, 0, "-", "preprocessing"], [4, 0, 0, "-", "reader"]], "quapy.data.base": [[4, 1, 1, "", "Dataset"], [4, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[4, 3, 1, "", "SplitStratified"], [4, 2, 1, "", "binary"], [4, 2, 1, "", "classes_"], [4, 3, 1, "", "kFCV"], [4, 3, 1, "", "load"], [4, 2, 1, "", "n_classes"], [4, 3, 1, "", "reduce"], [4, 3, 1, "", "stats"], [4, 2, 1, "", "train_test"], [4, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[4, 2, 1, "", "X"], [4, 2, 1, "", "Xp"], [4, 2, 1, "", "Xy"], [4, 2, 1, "", "binary"], [4, 3, 1, "", "counts"], [4, 3, 1, "", "join"], [4, 3, 1, "", "kFCV"], [4, 3, 1, "", "load"], [4, 2, 1, "", "n_classes"], [4, 2, 1, "", "p"], [4, 3, 1, "", "prevalence"], [4, 3, 1, "", "sampling"], [4, 3, 1, "", "sampling_from_index"], [4, 3, 1, "", "sampling_index"], [4, 3, 1, "", "split_random"], [4, 3, 1, "", "split_stratified"], [4, 3, 1, "", "stats"], [4, 3, 1, "", "uniform_sampling"], [4, 3, 1, "", "uniform_sampling_index"], [4, 2, 1, "", "y"]], "quapy.data.datasets": [[4, 5, 1, "", "fetch_IFCB"], [4, 5, 1, "", "fetch_UCIBinaryDataset"], [4, 5, 1, "", "fetch_UCIBinaryLabelledCollection"], [4, 5, 1, "", "fetch_UCIMulticlassDataset"], [4, 5, 1, "", "fetch_UCIMulticlassLabelledCollection"], [4, 5, 1, "", "fetch_lequa2022"], [4, 5, 1, "", "fetch_reviews"], [4, 5, 1, "", "fetch_twitter"], [4, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[4, 1, 1, "", "IndexTransformer"], [4, 5, 1, "", "index"], [4, 5, 1, "", "reduce_columns"], [4, 5, 1, "", "standardize"], [4, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[4, 3, 1, "", "add_word"], [4, 3, 1, "", "fit"], [4, 3, 1, "", "fit_transform"], [4, 3, 1, "", "transform"], [4, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[4, 5, 1, "", "binarize"], [4, 5, 1, "", "from_csv"], [4, 5, 1, "", "from_sparse"], [4, 5, 1, "", "from_text"], [4, 5, 1, "", "reindex_labels"]], "quapy.error": [[2, 5, 1, "", "absolute_error"], [2, 5, 1, "", "acc_error"], [2, 5, 1, "", "acce"], [2, 5, 1, "", "ae"], [2, 5, 1, "", "f1_error"], [2, 5, 1, "", "f1e"], [2, 5, 1, "", "from_name"], [2, 5, 1, "", "kld"], [2, 5, 1, "", "mae"], [2, 5, 1, "", "mean_absolute_error"], [2, 5, 1, "", "mean_normalized_absolute_error"], [2, 5, 1, "", "mean_normalized_relative_absolute_error"], [2, 5, 1, "", "mean_relative_absolute_error"], [2, 5, 1, "", "mkld"], [2, 5, 1, "", "mnae"], [2, 5, 1, "", "mnkld"], [2, 5, 1, "", "mnrae"], [2, 5, 1, "", "mrae"], [2, 5, 1, "", "mse"], [2, 5, 1, "", "nae"], [2, 5, 1, "", "nkld"], [2, 5, 1, "", "normalized_absolute_error"], [2, 5, 1, "", "normalized_relative_absolute_error"], [2, 5, 1, "", "nrae"], [2, 5, 1, "", "rae"], [2, 5, 1, "", "relative_absolute_error"], [2, 5, 1, "", "se"], [2, 5, 1, "", "smooth"]], "quapy.evaluation": [[2, 5, 1, "", "evaluate"], [2, 5, 1, "", "evaluate_on_samples"], [2, 5, 1, "", "evaluation_report"], [2, 5, 1, "", "prediction"]], "quapy.functional": [[2, 5, 1, "", "HellingerDistance"], [2, 5, 1, "", "TopsoeDistance"], [2, 5, 1, "", "adjusted_quantification"], [2, 5, 1, "", "argmin_prevalence"], [2, 5, 1, "", "as_binary_prevalence"], [2, 5, 1, "", "check_prevalence_vector"], [2, 5, 1, "", "clip_prevalence"], [2, 5, 1, "", "counts_from_labels"], [2, 5, 1, "", "get_divergence"], [2, 5, 1, "", "get_nprevpoints_approximation"], [2, 5, 1, "", "linear_search"], [2, 5, 1, "", "map_onto_probability_simplex"], [2, 5, 1, "", "normalize_prevalence"], [2, 5, 1, "", "num_prevalence_combinations"], [2, 5, 1, "", "optim_minimize"], [2, 5, 1, "", "prevalence_from_labels"], [2, 5, 1, "", "prevalence_from_probabilities"], [2, 5, 1, "", "prevalence_linspace"], [2, 5, 1, "", "solve_adjustment"], [2, 5, 1, "", "strprev"], [2, 5, 1, "", "uniform_prevalence_sampling"], [2, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[5, 0, 0, "-", "_kdey"], [5, 0, 0, "-", "_neural"], [5, 0, 0, "-", "_threshold_optim"], [5, 0, 0, "-", "aggregative"], [5, 0, 0, "-", "base"], [5, 0, 0, "-", "meta"], [5, 0, 0, "-", "non_aggregative"]], "quapy.method._kdey": [[5, 1, 1, "", "KDEBase"], [5, 1, 1, "", "KDEyCS"], [5, 1, 1, "", "KDEyHD"], [5, 1, 1, "", "KDEyML"]], "quapy.method._kdey.KDEBase": [[5, 4, 1, "", "BANDWIDTH_METHOD"], [5, 3, 1, "", "get_kde_function"], [5, 3, 1, "", "get_mixture_components"], [5, 3, 1, "", "pdf"]], "quapy.method._kdey.KDEyCS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "gram_matrix_mix_sum"]], "quapy.method._kdey.KDEyHD": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method._kdey.KDEyML": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method._neural": [[5, 1, 1, "", "QuaNetModule"], [5, 1, 1, "", "QuaNetTrainer"], [5, 5, 1, "", "mae_loss"]], "quapy.method._neural.QuaNetModule": [[5, 2, 1, "", "device"], [5, 3, 1, "", "forward"], [5, 4, 1, "", "training"]], "quapy.method._neural.QuaNetTrainer": [[5, 2, 1, "", "classes_"], [5, 3, 1, "", "clean_checkpoint"], [5, 3, 1, "", "clean_checkpoint_dir"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method._threshold_optim": [[5, 1, 1, "", "MAX"], [5, 1, 1, "", "MS"], [5, 1, 1, "", "MS2"], [5, 1, 1, "", "T50"], [5, 1, 1, "", "ThresholdOptimization"], [5, 1, 1, "", "X"]], "quapy.method._threshold_optim.MAX": [[5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS2": [[5, 3, 1, "", "discard"]], "quapy.method._threshold_optim.T50": [[5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.ThresholdOptimization": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregate_with_threshold"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "condition"], [5, 3, 1, "", "discard"]], "quapy.method._threshold_optim.X": [[5, 3, 1, "", "condition"]], "quapy.method.aggregative": [[5, 1, 1, "", "ACC"], [5, 4, 1, "", "AdjustedClassifyAndCount"], [5, 1, 1, "", "AggregativeCrispQuantifier"], [5, 1, 1, "", "AggregativeMedianEstimator"], [5, 1, 1, "", "AggregativeQuantifier"], [5, 1, 1, "", "AggregativeSoftQuantifier"], [5, 1, 1, "", "BayesianCC"], [5, 1, 1, "", "BinaryAggregativeQuantifier"], [5, 1, 1, "", "CC"], [5, 4, 1, "", "ClassifyAndCount"], [5, 1, 1, "", "DMy"], [5, 4, 1, "", "DistributionMatchingY"], [5, 1, 1, "", "DyS"], [5, 1, 1, "", "EMQ"], [5, 4, 1, "", "ExpectationMaximizationQuantifier"], [5, 1, 1, "", "HDy"], [5, 4, 1, "", "HellingerDistanceY"], [5, 1, 1, "", "OneVsAllAggregative"], [5, 1, 1, "", "PACC"], [5, 1, 1, "", "PCC"], [5, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [5, 4, 1, "", "ProbabilisticClassifyAndCount"], [5, 4, 1, "", "SLD"], [5, 1, 1, "", "SMM"], [5, 5, 1, "", "newELM"], [5, 5, 1, "", "newSVMAE"], [5, 5, 1, "", "newSVMKLD"], [5, 5, 1, "", "newSVMQ"], [5, 5, 1, "", "newSVMRAE"]], "quapy.method.aggregative.ACC": [[5, 4, 1, "", "CLIPPING"], [5, 4, 1, "", "METHODS"], [5, 4, 1, "", "SOLVERS"], [5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "getPteCondEstim"], [5, 3, 1, "", "newInvariantRatioEstimation"]], "quapy.method.aggregative.AggregativeMedianEstimator": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.aggregative.AggregativeQuantifier": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 2, 1, "", "classes_"], [5, 2, 1, "", "classifier"], [5, 3, 1, "", "classifier_fit_predict"], [5, 3, 1, "", "classify"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"], [5, 2, 1, "", "val_split"], [5, 4, 1, "", "val_split_"]], "quapy.method.aggregative.BayesianCC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "get_conditional_probability_samples"], [5, 3, 1, "", "get_prevalence_samples"], [5, 3, 1, "", "sample_from_posterior"]], "quapy.method.aggregative.BinaryAggregativeQuantifier": [[5, 3, 1, "", "fit"], [5, 2, 1, "", "neg_label"], [5, 2, 1, "", "pos_label"]], "quapy.method.aggregative.CC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DMy": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DyS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.EMQ": [[5, 3, 1, "", "EM"], [5, 3, 1, "", "EMQ_BCTS"], [5, 4, 1, "", "EPSILON"], [5, 4, 1, "", "MAX_ITER"], [5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "classify"], [5, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.OneVsAllAggregative": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "classify"]], "quapy.method.aggregative.PACC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.SMM": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.base": [[5, 1, 1, "", "BaseQuantifier"], [5, 1, 1, "", "BinaryQuantifier"], [5, 1, 1, "", "OneVsAll"], [5, 1, 1, "", "OneVsAllGeneric"], [5, 5, 1, "", "newOneVsAll"]], "quapy.method.base.BaseQuantifier": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[5, 2, 1, "", "classes_"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.meta": [[5, 5, 1, "", "EACC"], [5, 5, 1, "", "ECC"], [5, 5, 1, "", "EEMQ"], [5, 5, 1, "", "EHDy"], [5, 5, 1, "", "EPACC"], [5, 1, 1, "", "Ensemble"], [5, 1, 1, "", "MedianEstimator"], [5, 1, 1, "", "MedianEstimator2"], [5, 5, 1, "", "ensembleFactory"], [5, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[5, 4, 1, "", "VALID_POLICIES"], [5, 2, 1, "", "aggregative"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 2, 1, "", "probabilistic"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator2": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[5, 1, 1, "", "DMx"], [5, 4, 1, "", "DistributionMatchingX"], [5, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"], [5, 1, 1, "", "ReadMe"]], "quapy.method.non_aggregative.DMx": [[5, 3, 1, "", "HDx"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.ReadMe": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "std_constrained_linear_ls"]], "quapy.model_selection": [[2, 1, 1, "", "ConfigStatus"], [2, 1, 1, "", "GridSearchQ"], [2, 1, 1, "", "Status"], [2, 5, 1, "", "cross_val_predict"], [2, 5, 1, "", "expand_grid"], [2, 5, 1, "", "group_params"]], "quapy.model_selection.ConfigStatus": [[2, 3, 1, "", "failed"], [2, 3, 1, "", "success"]], "quapy.model_selection.GridSearchQ": [[2, 3, 1, "", "best_model"], [2, 3, 1, "", "fit"], [2, 3, 1, "", "get_params"], [2, 3, 1, "", "quantify"], [2, 3, 1, "", "set_params"]], "quapy.model_selection.Status": [[2, 4, 1, "", "ERROR"], [2, 4, 1, "", "INVALID"], [2, 4, 1, "", "SUCCESS"], [2, 4, 1, "", "TIMEOUT"]], "quapy.plot": [[2, 5, 1, "", "binary_bias_bins"], [2, 5, 1, "", "binary_bias_global"], [2, 5, 1, "", "binary_diagonal"], [2, 5, 1, "", "brokenbar_supremacy_by_drift"], [2, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[2, 1, 1, "", "APP"], [2, 1, 1, "", "AbstractProtocol"], [2, 1, 1, "", "AbstractStochasticSeededProtocol"], [2, 4, 1, "", "ArtificialPrevalenceProtocol"], [2, 1, 1, "", "DomainMixer"], [2, 1, 1, "", "IterateProtocol"], [2, 1, 1, "", "NPP"], [2, 4, 1, "", "NaturalPrevalenceProtocol"], [2, 1, 1, "", "OnLabelledCollectionProtocol"], [2, 1, 1, "", "UPP"], [2, 4, 1, "", "UniformPrevalenceProtocol"]], "quapy.protocol.APP": [[2, 3, 1, "", "prevalence_grid"], [2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[2, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[2, 3, 1, "", "collator"], [2, 2, 1, "", "random_state"], [2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.IterateProtocol": [[2, 3, 1, "", "total"]], "quapy.protocol.NPP": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[2, 4, 1, "", "RETURN_TYPES"], [2, 3, 1, "", "get_collator"], [2, 3, 1, "", "get_labelled_collection"], [2, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.UPP": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.util": [[2, 1, 1, "", "EarlyStop"], [2, 5, 1, "", "create_if_not_exist"], [2, 5, 1, "", "create_parent_dir"], [2, 5, 1, "", "download_file"], [2, 5, 1, "", "download_file_if_not_exists"], [2, 5, 1, "", "get_quapy_home"], [2, 5, 1, "", "map_parallel"], [2, 5, 1, "", "parallel"], [2, 5, 1, "", "parallel_unpack"], [2, 5, 1, "", "pickled_resource"], [2, 5, 1, "", "save_text_file"], [2, 5, 1, "", "temp_seed"], [2, 5, 1, "", "timeout"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"welcom": 0, "quapi": [0, 1, 2, 3, 4, 5], "": 0, "document": 0, "instal": 0, "github": 0, "content": [0, 2, 3, 4, 5], "indic": 0, "tabl": 0, "packag": [2, 3, 4, 5], "subpackag": 2, "submodul": [2, 3, 4, 5], "error": 2, "modul": [2, 3, 4, 5], "evalu": 2, "function": 2, "model_select": 2, "plot": 2, "protocol": 2, "util": 2, "classif": 3, "calibr": 3, "method": [3, 5], "neural": 3, "svmperf": 3, "data": 4, "base": [4, 5], "dataset": 4, "preprocess": 4, "reader": 4, "aggreg": 5, "meta": 5, "non_aggreg": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Welcome to QuaPy\u2019s documentation!": [[0, "welcome-to-quapy-s-documentation"]], "Installation": [[0, "installation"]], "GitHub": [[0, "github"]], "Contents": [[0, "contents"]], "Indices and tables": [[0, "indices-and-tables"]], "quapy": [[1, "quapy"]], "Submodules": [[3, "submodules"], [4, "submodules"], [2, "submodules"], [5, "submodules"]], "Module contents": [[3, "module-quapy.classification"], [4, "module-quapy.data"], [2, "module-quapy"], [5, "module-quapy.method"]], "quapy.classification package": [[3, "quapy-classification-package"]], "quapy.classification.calibration module": [[3, "module-quapy.classification.calibration"]], "quapy.classification.methods module": [[3, "module-quapy.classification.methods"]], "quapy.classification.neural module": [[3, "module-quapy.classification.neural"]], "quapy.classification.svmperf module": [[3, "module-quapy.classification.svmperf"]], "quapy.data package": [[4, "quapy-data-package"]], "quapy.data.base module": [[4, "module-quapy.data.base"]], "quapy.data.datasets module": [[4, "module-quapy.data.datasets"]], "quapy.data.preprocessing module": [[4, "module-quapy.data.preprocessing"]], "quapy.data.reader module": [[4, "module-quapy.data.reader"]], "quapy package": [[2, "quapy-package"]], "Subpackages": [[2, "subpackages"]], "quapy.error module": [[2, "module-quapy.error"]], "quapy.evaluation module": [[2, "module-quapy.evaluation"]], "quapy.functional module": [[2, "module-quapy.functional"]], "quapy.model_selection module": [[2, "module-quapy.model_selection"]], "quapy.plot module": [[2, "module-quapy.plot"]], "quapy.protocol module": [[2, "module-quapy.protocol"]], "quapy.util module": [[2, "module-quapy.util"]], "quapy.method package": [[5, "quapy-method-package"]], "quapy.method.aggregative module": [[5, "module-quapy.method.aggregative"]], "quapy.method.base module": [[5, "module-quapy.method.base"]], "quapy.method.meta module": [[5, "module-quapy.method.meta"]], "quapy.method.non_aggregative module": [[5, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[2, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[2, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol"]], "artificialprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.ArtificialPrevalenceProtocol"]], "configstatus (class in quapy.model_selection)": [[2, "quapy.model_selection.ConfigStatus"]], "domainmixer (class in quapy.protocol)": [[2, "quapy.protocol.DomainMixer"]], "error (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.ERROR"]], "earlystop (class in quapy.util)": [[2, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[2, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[2, "quapy.functional.HellingerDistance"]], "invalid (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.INVALID"]], "iterateprotocol (class in quapy.protocol)": [[2, "quapy.protocol.IterateProtocol"]], "npp (class in quapy.protocol)": [[2, "quapy.protocol.NPP"]], "naturalprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.NaturalPrevalenceProtocol"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[2, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "success (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.SUCCESS"]], "status (class in quapy.model_selection)": [[2, "quapy.model_selection.Status"]], "timeout (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.TIMEOUT"]], "topsoedistance() (in module quapy.functional)": [[2, "quapy.functional.TopsoeDistance"]], "upp (class in quapy.protocol)": [[2, "quapy.protocol.UPP"]], "uniformprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.UniformPrevalenceProtocol"]], "absolute_error() (in module quapy.error)": [[2, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[2, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[2, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[2, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[2, "quapy.error.ae"]], "argmin_prevalence() (in module quapy.functional)": [[2, "quapy.functional.argmin_prevalence"]], "as_binary_prevalence() (in module quapy.functional)": [[2, "quapy.functional.as_binary_prevalence"]], "best_model() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[2, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[2, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[2, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[2, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[2, "quapy.functional.check_prevalence_vector"]], "clip_prevalence() (in module quapy.functional)": [[2, "quapy.functional.clip_prevalence"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "counts_from_labels() (in module quapy.functional)": [[2, "quapy.functional.counts_from_labels"]], "create_if_not_exist() (in module quapy.util)": [[2, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[2, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[2, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[2, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[2, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[2, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluate"]], "evaluate_on_samples() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluate_on_samples"]], "evaluation_report() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluation_report"]], "expand_grid() (in module quapy.model_selection)": [[2, "quapy.model_selection.expand_grid"]], "f1_error() (in module quapy.error)": [[2, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[2, "quapy.error.f1e"]], "failed() (quapy.model_selection.configstatus method)": [[2, "quapy.model_selection.ConfigStatus.failed"]], "fit() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[2, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_divergence() (in module quapy.functional)": [[2, "quapy.functional.get_divergence"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[2, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[2, "quapy.util.get_quapy_home"]], "group_params() (in module quapy.model_selection)": [[2, "quapy.model_selection.group_params"]], "kld() (in module quapy.error)": [[2, "quapy.error.kld"]], "linear_search() (in module quapy.functional)": [[2, "quapy.functional.linear_search"]], "mae() (in module quapy.error)": [[2, "quapy.error.mae"]], "map_onto_probability_simplex() (in module quapy.functional)": [[2, "quapy.functional.map_onto_probability_simplex"]], "map_parallel() (in module quapy.util)": [[2, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_absolute_error"]], "mean_normalized_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_normalized_absolute_error"]], "mean_normalized_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_normalized_relative_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[2, "quapy.error.mkld"]], "mnae() (in module quapy.error)": [[2, "quapy.error.mnae"]], "mnkld() (in module quapy.error)": [[2, "quapy.error.mnkld"]], "mnrae() (in module quapy.error)": [[2, "quapy.error.mnrae"]], "module": [[2, "module-quapy"], [2, "module-quapy.error"], [2, "module-quapy.evaluation"], [2, "module-quapy.functional"], [2, "module-quapy.model_selection"], [2, "module-quapy.plot"], [2, "module-quapy.protocol"], [2, "module-quapy.util"], [5, "module-quapy.method"], [5, "module-quapy.method._kdey"], [5, "module-quapy.method._neural"], [5, "module-quapy.method._threshold_optim"], [5, "module-quapy.method.aggregative"], [5, "module-quapy.method.base"], [5, "module-quapy.method.meta"], [5, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[2, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[2, "quapy.error.mse"]], "nae() (in module quapy.error)": [[2, "quapy.error.nae"]], "nkld() (in module quapy.error)": [[2, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[2, "quapy.functional.normalize_prevalence"]], "normalized_absolute_error() (in module quapy.error)": [[2, "quapy.error.normalized_absolute_error"]], "normalized_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.normalized_relative_absolute_error"]], "nrae() (in module quapy.error)": [[2, "quapy.error.nrae"]], "num_prevalence_combinations() (in module quapy.functional)": [[2, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "optim_minimize() (in module quapy.functional)": [[2, "quapy.functional.optim_minimize"]], "parallel() (in module quapy.util)": [[2, "quapy.util.parallel"]], "parallel_unpack() (in module quapy.util)": [[2, "quapy.util.parallel_unpack"]], "pickled_resource() (in module quapy.util)": [[2, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[2, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[2, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[2, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[2, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[2, "module-quapy"]], "quapy.error": [[2, "module-quapy.error"]], "quapy.evaluation": [[2, "module-quapy.evaluation"]], "quapy.functional": [[2, "module-quapy.functional"]], "quapy.model_selection": [[2, "module-quapy.model_selection"]], "quapy.plot": [[2, "module-quapy.plot"]], "quapy.protocol": [[2, "module-quapy.protocol"]], "quapy.util": [[2, "module-quapy.util"]], "rae() (in module quapy.error)": [[2, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[2, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[2, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[2, "quapy.error.smooth"]], "solve_adjustment() (in module quapy.functional)": [[2, "quapy.functional.solve_adjustment"]], "strprev() (in module quapy.functional)": [[2, "quapy.functional.strprev"]], "success() (quapy.model_selection.configstatus method)": [[2, "quapy.model_selection.ConfigStatus.success"]], "temp_seed() (in module quapy.util)": [[2, "quapy.util.temp_seed"]], "timeout() (in module quapy.util)": [[2, "quapy.util.timeout"]], "total() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[2, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.iterateprotocol method)": [[2, "quapy.protocol.IterateProtocol.total"]], "total() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[2, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[2, "quapy.functional.uniform_simplex_sampling"]], "acc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativecrispquantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeCrispQuantifier"]], "aggregativemedianestimator (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator"]], "aggregativequantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeQuantifier"]], "aggregativesoftquantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeSoftQuantifier"]], "bandwidth_method (quapy.method._kdey.kdebase attribute)": [[5, "quapy.method._kdey.KDEBase.BANDWIDTH_METHOD"]], "basequantifier (class in quapy.method.base)": [[5, "quapy.method.base.BaseQuantifier"]], "bayesiancc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.BayesianCC"]], "binaryaggregativequantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[5, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.CC"]], "clipping (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.CLIPPING"]], "classifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ClassifyAndCount"]], "dmx (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.DMx"]], "dmy (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.DMy"]], "distributionmatchingx (in module quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.DistributionMatchingX"]], "distributionmatchingy (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.DistributionMatchingY"]], "dys (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[5, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[5, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[5, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[5, "quapy.method.meta.EHDy"]], "em() (quapy.method.aggregative.emq class method)": [[5, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.EMQ"]], "emq_bcts() (quapy.method.aggregative.emq class method)": [[5, "quapy.method.aggregative.EMQ.EMQ_BCTS"]], "epacc() (in module quapy.method.meta)": [[5, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[5, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[5, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "hdx() (quapy.method.non_aggregative.dmx class method)": [[5, "quapy.method.non_aggregative.DMx.HDx"]], "hdy (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.HellingerDistanceY"]], "kdebase (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEBase"]], "kdeycs (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyCS"]], "kdeyhd (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyHD"]], "kdeyml (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyML"]], "max (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[5, "quapy.method.aggregative.EMQ.MAX_ITER"]], "methods (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.METHODS"]], "ms (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MS"]], "ms2 (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "medianestimator (class in quapy.method.meta)": [[5, "quapy.method.meta.MedianEstimator"]], "medianestimator2 (class in quapy.method.meta)": [[5, "quapy.method.meta.MedianEstimator2"]], "onevsall (class in quapy.method.base)": [[5, "quapy.method.base.OneVsAll"]], "onevsallaggregative (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.OneVsAllAggregative"]], "onevsallgeneric (class in quapy.method.base)": [[5, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method._neural)": [[5, "quapy.method._neural.QuaNetModule"]], "quanettrainer (class in quapy.method._neural)": [[5, "quapy.method._neural.QuaNetTrainer"]], "readme (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.ReadMe"]], "sld (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.SMM"]], "solvers (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.SOLVERS"]], "t50 (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.T50"]], "thresholdoptimization (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[5, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.X"]], "aggregate() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.aggregate"]], "aggregate() (quapy.method._kdey.kdeyhd method)": [[5, "quapy.method._kdey.KDEyHD.aggregate"]], "aggregate() (quapy.method._kdey.kdeyml method)": [[5, "quapy.method._kdey.KDEyML.aggregate"]], "aggregate() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.aggregate"]], "aggregate() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregate"]], "aggregate() (quapy.method.aggregative.acc method)": [[5, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[5, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.dmy method)": [[5, "quapy.method.aggregative.DMy.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[5, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[5, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsallaggregative method)": [[5, "quapy.method.aggregative.OneVsAllAggregative.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[5, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[5, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[5, "quapy.method.aggregative.SMM.aggregate"]], "aggregate_with_threshold() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregate_with_threshold"]], "aggregation_fit() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.aggregation_fit"]], "aggregation_fit() (quapy.method._kdey.kdeyhd method)": [[5, "quapy.method._kdey.KDEyHD.aggregation_fit"]], "aggregation_fit() (quapy.method._kdey.kdeyml method)": [[5, "quapy.method._kdey.KDEyML.aggregation_fit"]], "aggregation_fit() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.aggregation_fit"]], "aggregation_fit() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.acc method)": [[5, "quapy.method.aggregative.ACC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.cc method)": [[5, "quapy.method.aggregative.CC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.dmy method)": [[5, "quapy.method.aggregative.DMy.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.dys method)": [[5, "quapy.method.aggregative.DyS.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.hdy method)": [[5, "quapy.method.aggregative.HDy.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.pacc method)": [[5, "quapy.method.aggregative.PACC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.pcc method)": [[5, "quapy.method.aggregative.PCC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.smm method)": [[5, "quapy.method.aggregative.SMM.aggregation_fit"]], "aggregative (quapy.method.meta.ensemble property)": [[5, "quapy.method.meta.Ensemble.aggregative"]], "classes_ (quapy.method._neural.quanettrainer property)": [[5, "quapy.method._neural.QuaNetTrainer.classes_"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.base.onevsallgeneric property)": [[5, "quapy.method.base.OneVsAllGeneric.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classifier_fit_predict() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classifier_fit_predict"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.classify"]], "classify() (quapy.method.aggregative.onevsallaggregative method)": [[5, "quapy.method.aggregative.OneVsAllAggregative.classify"]], "clean_checkpoint() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.clean_checkpoint_dir"]], "condition() (quapy.method._threshold_optim.max method)": [[5, "quapy.method._threshold_optim.MAX.condition"]], "condition() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.condition"]], "condition() (quapy.method._threshold_optim.t50 method)": [[5, "quapy.method._threshold_optim.T50.condition"]], "condition() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.condition"]], "condition() (quapy.method._threshold_optim.x method)": [[5, "quapy.method._threshold_optim.X.condition"]], "device (quapy.method._neural.quanetmodule property)": [[5, "quapy.method._neural.QuaNetModule.device"]], "discard() (quapy.method._threshold_optim.ms2 method)": [[5, "quapy.method._threshold_optim.MS2.discard"]], "discard() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.discard"]], "ensemblefactory() (in module quapy.method.meta)": [[5, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.fit"]], "fit() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.binaryaggregativequantifier method)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.fit"]], "fit() (quapy.method.base.basequantifier method)": [[5, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[5, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.fit"]], "fit() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.fit"]], "fit() (quapy.method.non_aggregative.dmx method)": [[5, "quapy.method.non_aggregative.DMx.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "fit() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.fit"]], "forward() (quapy.method._neural.quanetmodule method)": [[5, "quapy.method._neural.QuaNetModule.forward"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[5, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[5, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_conditional_probability_samples() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.get_conditional_probability_samples"]], "get_kde_function() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.get_kde_function"]], "get_mixture_components() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.get_mixture_components"]], "get_params() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.get_params"]], "get_params() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.get_params"]], "get_params() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.get_params"]], "get_params() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.get_params"]], "get_prevalence_samples() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.get_prevalence_samples"]], "get_probability_distribution() (in module quapy.method.meta)": [[5, "quapy.method.meta.get_probability_distribution"]], "gram_matrix_mix_sum() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.gram_matrix_mix_sum"]], "mae_loss() (in module quapy.method._neural)": [[5, "quapy.method._neural.mae_loss"]], "neg_label (quapy.method.aggregative.binaryaggregativequantifier property)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.neg_label"]], "newelm() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newELM"]], "newinvariantratioestimation() (quapy.method.aggregative.acc class method)": [[5, "quapy.method.aggregative.ACC.newInvariantRatioEstimation"]], "newonevsall() (in module quapy.method.base)": [[5, "quapy.method.base.newOneVsAll"]], "newsvmae() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMAE"]], "newsvmkld() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMKLD"]], "newsvmq() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMQ"]], "newsvmrae() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMRAE"]], "pdf() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.pdf"]], "pos_label (quapy.method.aggregative.binaryaggregativequantifier property)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.pos_label"]], "predict_proba() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[5, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.quantify"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[5, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[5, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.quantify"]], "quantify() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.quantify"]], "quantify() (quapy.method.non_aggregative.dmx method)": [[5, "quapy.method.non_aggregative.DMx.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quantify() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.quantify"]], "quapy.method": [[5, "module-quapy.method"]], "quapy.method._kdey": [[5, "module-quapy.method._kdey"]], "quapy.method._neural": [[5, "module-quapy.method._neural"]], "quapy.method._threshold_optim": [[5, "module-quapy.method._threshold_optim"]], "quapy.method.aggregative": [[5, "module-quapy.method.aggregative"]], "quapy.method.base": [[5, "module-quapy.method.base"]], "quapy.method.meta": [[5, "module-quapy.method.meta"]], "quapy.method.non_aggregative": [[5, "module-quapy.method.non_aggregative"]], "sample_from_posterior() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.sample_from_posterior"]], "set_params() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.set_params"]], "set_params() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.set_params"]], "set_params() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.set_params"]], "set_params() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.set_params"]], "std_constrained_linear_ls() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.std_constrained_linear_ls"]], "training (quapy.method._neural.quanetmodule attribute)": [[5, "quapy.method._neural.QuaNetModule.training"]], "val_split (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.val_split"]], "val_split_ (quapy.method.aggregative.aggregativequantifier attribute)": [[5, "quapy.method.aggregative.AggregativeQuantifier.val_split_"]]}}) \ No newline at end of file diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..734acf2 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,55 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import pathlib +import sys +from os.path import join +quapy_path = join(pathlib.Path(__file__).parents[2].resolve().as_posix(), 'quapy') +print(f'quapy path={quapy_path}') +sys.path.insert(0, quapy_path) + + +project = 'QuaPy: A Python-based open-source framework for quantification' +copyright = '2024, Alejandro Moreo' +author = 'Alejandro Moreo' + + + +import quapy + +release = quapy.__version__ + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx.ext.duration', + 'sphinx.ext.doctest', + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.viewcode', + 'sphinx.ext.napoleon' +] + +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_rtd_theme' +# html_theme = 'furo' +# need to be installed: pip install furo (not working...) +html_static_path = ['_static'] + + diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..cc5b4dc --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,41 @@ +.. QuaPy: A Python-based open-source framework for quantification documentation master file, created by + sphinx-quickstart on Wed Feb 7 16:26:46 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to QuaPy's documentation! +========================================================================================== + +QuaPy is a Python-based open-source framework for quantification. + +This document contains the API of the modules included in QuaPy. + +Installation +------------ + +`pip install quapy` + +GitHub +------------ + +QuaPy is hosted in GitHub at `https://github.com/HLT-ISTI/QuaPy `_ + + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + +Contents +-------- + +.. toctree:: + + modules + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..5d84a54 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +quapy +===== + +.. toctree:: + :maxdepth: 4 + + quapy diff --git a/docs/source/quapy.classification.rst b/docs/source/quapy.classification.rst new file mode 100644 index 0000000..cfc7d9b --- /dev/null +++ b/docs/source/quapy.classification.rst @@ -0,0 +1,45 @@ +quapy.classification package +============================ + +Submodules +---------- + +quapy.classification.calibration module +--------------------------------------- + +.. automodule:: quapy.classification.calibration + :members: + :undoc-members: + :show-inheritance: + +quapy.classification.methods module +----------------------------------- + +.. automodule:: quapy.classification.methods + :members: + :undoc-members: + :show-inheritance: + +quapy.classification.neural module +---------------------------------- + +.. automodule:: quapy.classification.neural + :members: + :undoc-members: + :show-inheritance: + +quapy.classification.svmperf module +----------------------------------- + +.. automodule:: quapy.classification.svmperf + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy.classification + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/quapy.data.rst b/docs/source/quapy.data.rst new file mode 100644 index 0000000..cadace6 --- /dev/null +++ b/docs/source/quapy.data.rst @@ -0,0 +1,46 @@ +quapy.data package +================== + +Submodules +---------- + +quapy.data.base module +---------------------- + +.. automodule:: quapy.data.base + :members: + :undoc-members: + :show-inheritance: + +quapy.data.datasets module +-------------------------- + +.. automodule:: quapy.data.datasets + :members: + :undoc-members: + :show-inheritance: + + +quapy.data.preprocessing module +------------------------------- + +.. automodule:: quapy.data.preprocessing + :members: + :undoc-members: + :show-inheritance: + +quapy.data.reader module +------------------------ + +.. automodule:: quapy.data.reader + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy.data + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/quapy.method.rst b/docs/source/quapy.method.rst new file mode 100644 index 0000000..8026e0a --- /dev/null +++ b/docs/source/quapy.method.rst @@ -0,0 +1,61 @@ +quapy.method package +==================== + +Submodules +---------- + +quapy.method.aggregative module +------------------------------- + +.. automodule:: quapy.method.aggregative + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: quapy.method._kdey + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: quapy.method._neural + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: quapy.method._threshold_optim + :members: + :undoc-members: + :show-inheritance: + + +quapy.method.base module +------------------------ + +.. automodule:: quapy.method.base + :members: + :undoc-members: + :show-inheritance: + +quapy.method.meta module +------------------------ + +.. automodule:: quapy.method.meta + :members: + :undoc-members: + :show-inheritance: + +quapy.method.non\_aggregative module +------------------------------------ + +.. automodule:: quapy.method.non_aggregative + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy.method + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/quapy.rst b/docs/source/quapy.rst new file mode 100644 index 0000000..af2708b --- /dev/null +++ b/docs/source/quapy.rst @@ -0,0 +1,80 @@ +quapy package +============= + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + quapy.classification + quapy.data + quapy.method + + +Submodules +---------- + +quapy.error module +------------------ + +.. automodule:: quapy.error + :members: + :undoc-members: + :show-inheritance: + +quapy.evaluation module +----------------------- + +.. automodule:: quapy.evaluation + :members: + :undoc-members: + :show-inheritance: + +quapy.functional module +----------------------- + +.. automodule:: quapy.functional + :members: + :undoc-members: + :show-inheritance: + +quapy.model\_selection module +----------------------------- + +.. automodule:: quapy.model_selection + :members: + :undoc-members: + :show-inheritance: + +quapy.plot module +----------------- + +.. automodule:: quapy.plot + :members: + :undoc-members: + :show-inheritance: + +quapy.protocol module +--------------------- + +.. automodule:: quapy.protocol + :members: + :undoc-members: + :show-inheritance: + +quapy.util module +----------------- + +.. automodule:: quapy.util + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: quapy + :members: + :undoc-members: + :show-inheritance: diff --git a/examples/bayesian_quantification.py b/examples/bayesian_quantification.py index 06c928c..2d0f4ed 100644 --- a/examples/bayesian_quantification.py +++ b/examples/bayesian_quantification.py @@ -1,4 +1,6 @@ """ +.. author:: Paweł Czyż + This example shows how to use Bayesian quantification (https://arxiv.org/abs/2302.09159), which is suitable for low-data situations and when the uncertainty of the prevalence estimate is of interest. diff --git a/quapy/functional.py b/quapy/functional.py index 84acdbc..677715b 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -2,24 +2,25 @@ import itertools import warnings from collections import defaultdict from typing import Literal, Union, Callable +from numpy.typing import ArrayLike import scipy import numpy as np -def prevalence_linspace(n_prevalences=21, repeats=1, smooth_limits_epsilon=0.01): +def prevalence_linspace(grid_points:int=21, repeats:int=1, smooth_limits_epsilon:float=0.01): """ Produces an array of uniformly separated values of prevalence. By default, produces an array of 21 prevalence values, with step 0.05 and with the limits smoothed, i.e.: [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99] - :param n_prevalences: the number of prevalence values to sample from the [0,1] interval (default 21) + :param grid_points: the number of prevalence values to sample from the [0,1] interval (default 21) :param repeats: number of times each prevalence is to be repeated (defaults to 1) :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 :return: an array of uniformly separated prevalence values """ - p = np.linspace(0., 1., num=n_prevalences, endpoint=True) + p = np.linspace(0., 1., num=grid_points, endpoint=True) p[0] += smooth_limits_epsilon p[-1] -= smooth_limits_epsilon if p[0] > p[1]: @@ -29,7 +30,7 @@ def prevalence_linspace(n_prevalences=21, repeats=1, smooth_limits_epsilon=0.01) return p -def counts_from_labels(labels, classes): +def counts_from_labels(labels: ArrayLike, classes: ArrayLike): """ Computes the count values from a vector of labels. @@ -38,7 +39,7 @@ def counts_from_labels(labels, classes): some classes have no examples. :return: an ndarray of shape `(len(classes),)` with the occurrence counts of each class """ - if labels.ndim != 1: + if np.asarray(labels).ndim != 1: raise ValueError(f'param labels does not seem to be a ndarray of label predictions') unique, counts = np.unique(labels, return_counts=True) by_class = defaultdict(lambda:0, dict(zip(unique, counts))) @@ -46,7 +47,7 @@ def counts_from_labels(labels, classes): return counts -def prevalence_from_labels(labels, classes): +def prevalence_from_labels(labels: ArrayLike, classes: ArrayLike): """ Computes the prevalence values from a vector of labels. @@ -59,7 +60,7 @@ def prevalence_from_labels(labels, classes): return counts / np.sum(counts) -def prevalence_from_probabilities(posteriors, binarize: bool = False): +def prevalence_from_probabilities(posteriors: ArrayLike, binarize: bool = False): """ Returns a vector of prevalence values from a matrix of posterior probabilities. @@ -68,6 +69,7 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False): converting the vectors of posterior probabilities into class indices, by taking the argmax). :return: array of shape `(n_classes,)` containing the prevalence values """ + posteriors = np.asarray(posteriors) if posteriors.ndim != 2: raise ValueError(f'param posteriors does not seem to be a ndarray of posteior probabilities') if binarize: @@ -79,7 +81,7 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False): return prevalences -def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_necessary=False): +def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_necessary: bool=False): """ Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two values representing a binary distribution. @@ -97,7 +99,7 @@ def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_ -def HellingerDistance(P, Q) -> float: +def HellingerDistance(P: np.ndarray, Q: np.ndarray) -> float: """ Computes the Hellingher Distance (HD) between (discretized) distributions `P` and `Q`. The HD for two discrete distributions of `k` bins is defined as: @@ -112,7 +114,7 @@ def HellingerDistance(P, Q) -> float: return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2)) -def TopsoeDistance(P, Q, epsilon=1e-20): +def TopsoeDistance(P: np.ndarray, Q: np.ndarray, epsilon: float=1e-20): """ Topsoe distance between two (discretized) distributions `P` and `Q`. The Topsoe distance for two discrete distributions of `k` bins is defined as: @@ -128,7 +130,7 @@ def TopsoeDistance(P, Q, epsilon=1e-20): return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + Q*np.log((2*Q+epsilon)/(P+Q+epsilon))) -def uniform_prevalence_sampling(n_classes, size=1): +def uniform_prevalence_sampling(n_classes: int, size: int=1): """ Implements the `Kraemer algorithm `_ for sampling uniformly at random from the unit simplex. This implementation is adapted from this @@ -157,7 +159,7 @@ def uniform_prevalence_sampling(n_classes, size=1): uniform_simplex_sampling = uniform_prevalence_sampling -def strprev(prevalences, prec=3): +def strprev(prevalences: ArrayLike, prec: int=3): """ Returns a string representation for a prevalence vector. E.g., @@ -171,7 +173,7 @@ def strprev(prevalences, prec=3): return '['+ ', '.join([f'{p:.{prec}f}' for p in prevalences]) + ']' -def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True): +def adjusted_quantification(prevalence_estim: ArrayLike, tpr: float, fpr: float, clip: bool=True): """ Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the positive class `p` comes down to computing: @@ -195,7 +197,7 @@ def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True): return adjusted -def normalize_prevalence(prevalences): +def normalize_prevalence(prevalences: ArrayLike): """ Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in cases in which the prevalence values are not all-zeros, and to convert the prevalence values into `1/n_classes` in @@ -287,23 +289,23 @@ def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repe n_prevpoints += 1 -def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08): +def check_prevalence_vector(prevalences: ArrayLike, raise_exception: bool=False, toleranze: float=1e-08): """ Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1. - :param p: the prevalence vector to check + :param prevalences: the prevalence vector to check :return: True if `p` is valid, False otherwise """ - p = np.asarray(p) - if not all(p>=0): + prevalences = np.asarray(prevalences) + if not all(prevalences >= 0): if raise_exception: raise ValueError('the prevalence vector contains negative numbers') return False - if not all(p<=1): + if not all(prevalences <= 1): if raise_exception: raise ValueError('the prevalence vector contains values >1') return False - if not np.isclose(p.sum(), 1, atol=toleranze): + if not np.isclose(prevalences.sum(), 1, atol=toleranze): if raise_exception: raise ValueError('the prevalence vector does not sum up to 1') return False @@ -311,6 +313,14 @@ def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08): def get_divergence(divergence: Union[str, Callable]): + """ + Guarantees that the divergence received as argument is a function. That is, if this argument is already + a callable, then it is returned, if it is instead a string, then tries to instantiate the corresponding + divergence from the string name. + + :param divergence: callable or string indicating the name of the divergence function + :return: callable + """ if isinstance(divergence, str): if divergence=='HD': return HellingerDistance @@ -324,7 +334,20 @@ def get_divergence(divergence: Union[str, Callable]): raise ValueError(f'argument "divergence" not understood; use a str or a callable function') -def argmin_prevalence(loss, n_classes, method='optim_minimize'): +def argmin_prevalence(loss: Callable, + n_classes: int, + method: Literal["optim_minimize", "linear_search", "ternary_search"]='optim_minimize'): + """ + Searches for the prevalence vector that minimizes a loss function. + + :param loss: callable, the function to minimize + :param n_classes: int, number of classes + :param method: string indicating the search strategy. Possible values are:: + 'optim_minimize': uses scipy.optim + 'linear_search': carries out a linear search for binary problems in the space [0, 0.01, 0.02, ..., 1] + 'ternary_search': implements the ternary search (not yet implemented) + :return: np.ndarray, a prevalence vector + """ if method == 'optim_minimize': return optim_minimize(loss, n_classes) elif method == 'linear_search': @@ -335,7 +358,7 @@ def argmin_prevalence(loss, n_classes, method='optim_minimize'): raise NotImplementedError() -def optim_minimize(loss, n_classes): +def optim_minimize(loss: Callable, n_classes: int): """ Searches for the optimal prevalence values, i.e., an `n_classes`-dimensional vector of the (`n_classes`-1)-simplex that yields the smallest lost. This optimization is carried out by means of a constrained search using scipy's @@ -357,7 +380,7 @@ def optim_minimize(loss, n_classes): return r.x -def linear_search(loss, n_classes): +def linear_search(loss: Callable, n_classes: int): """ Performs a linear search for the best prevalence value in binary problems. The search is carried out by exploring the range [0,1] stepping by 0.01. This search is inefficient, and is added only for completeness (some of the @@ -370,7 +393,7 @@ def linear_search(loss, n_classes): assert n_classes==2, 'linear search is only available for binary problems' prev_selected, min_score = None, None - for prev in prevalence_linspace(n_prevalences=100, repeats=1, smooth_limits_epsilon=0.0): + for prev in prevalence_linspace(grid_points=100, repeats=1, smooth_limits_epsilon=0.0): score = loss(np.asarray([1 - prev, prev])) if min_score is None or score < min_score: prev_selected, min_score = prev, score @@ -378,7 +401,7 @@ def linear_search(loss, n_classes): return np.asarray([1 - prev_selected, prev_selected]) -def _project_onto_probability_simplex(v: np.ndarray) -> np.ndarray: +def map_onto_probability_simplex(unnormalized_arr: ArrayLike) -> np.ndarray: """Projects a point onto the probability simplex. The code is adapted from Mathieu Blondel's BSD-licensed @@ -389,85 +412,88 @@ def _project_onto_probability_simplex(v: np.ndarray) -> np.ndarray: Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex, ICPR 2014, `URL `_ - :param v: point in n-dimensional space, shape `(n,)` + :param unnormalized_arr: point in n-dimensional space, shape `(n,)` :return: projection of `v` onto (n-1)-dimensional probability simplex, shape `(n,)` """ - v = np.asarray(v) - n = len(v) + unnormalized_arr = np.asarray(unnormalized_arr) + n = len(unnormalized_arr) # Sort the values in the descending order - u = np.sort(v)[::-1] + u = np.sort(unnormalized_arr)[::-1] cssv = np.cumsum(u) - 1.0 ind = np.arange(1, n + 1) cond = u - cssv / ind > 0 rho = ind[cond][-1] theta = cssv[cond][-1] / float(rho) - return np.maximum(v - theta, 0) + return np.maximum(unnormalized_arr - theta, 0) - -def clip_prevalence(p: np.ndarray, method: Literal[None, "none", "clip", "project"]) -> np.ndarray: +def clip_prevalence(prevalences: ArrayLike, method: Literal[None, "none", "clip", "project"]) -> np.ndarray: """ - Clips the proportions vector `p` so that it is a valid probability distribution. + Clips the proportions vector `prevalences` so that it is a valid probability distribution, i.e., all values + are in [0,1] and sum up to 1. - :param p: the proportions vector to be clipped, shape `(n_classes,)` - :param method: the method to use for normalization. + :param prevalences: array-like, the proportions vector to be clipped, shape `(n_classes,)` + :param method: indicates the method to be used for normalization. If `None` or `"none"`, no normalization is performed. If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. If `"project"`, the values are projected onto the probability simplex. :return: the normalized prevalence vector, shape `(n_classes,)` """ - if method is None or method == "none": - return p + prevalences = np.asarray(prevalences) + if method in [None, "none"]: + return prevalences elif method == "clip": - adjusted = np.clip(p, 0, 1) - return adjusted / adjusted.sum() + clipped = np.clip(prevalences, 0, 1) + adjusted = clipped / clipped.sum() + return adjusted elif method == "project": - return _project_onto_probability_simplex(p) + return map_onto_probability_simplex(prevalences) else: - raise ValueError(f"Method {method} not known.") + raise ValueError(f'Unknown method {method}. Valid ones are "none", "clip", or "project"') def solve_adjustment( - p_c_y: np.ndarray, + p_c_cond_y: np.ndarray, p_c: np.ndarray, method: Literal["inversion", "invariant-ratio"], - solver: Literal["exact", "minimize", "exact-raise", "exact-cc"], -) -> np.ndarray: + solver: Literal["exact", "minimize", "exact-raise", "exact-cc"]) -> np.ndarray: """ - Function finding the prevalence vector by adjusting - the classifier predictions. + Function that tries to solve for the equation :math:`P(C)=P(C|Y)P(Y)`, where :math:`P(C)` is the vector of + prevalence values obtained by a classify and count, and :math:`P(C|Y)` are the class-conditional misclassification + rates of the classifier. - :param p_c_y: array of shape `(n_classes, n_classes,)` with entry `(c,y)` being the estimate - of :math:`P(C=c|Y=y)`, that is, the probability that an instance that belongs to class :math:`y` - ends up being classified as belonging to class :math:`c` - :param p_c: classifier predictions, where the entry `c` is the estimate of :math:`P(C=c)`. Shape `(n_classes,)` - :param method: adjustment method to be used: - 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, - which tries to invert `P(C|Y)` matrix. - 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, - which replaces the last equation with the normalization condition. - :param solver: the method to use for solving the system of linear equations. Valid options are: - 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has - rank strictly less than `n_classes`. - 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds - to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) - 'exact': deprecated, defaults to 'exact-cc' - 'minimize': minimizes a loss, so the solution always exists + :param p_c_cond_y: array of shape `(n_classes, n_classes,)` with entry `(c,y)` being the estimate + of :math:`P(C=c|Y=y)`, that is, the probability that an instance that belongs to class :math:`y` + ends up being classified as belonging to class :math:`c` + + :param p_c: array of shape `(n_classes,)` containing the prevalence values as estimated by classify and count + + :param str method: indicates the adjustment method to be used. Valid options are: + + * 'inversion': tries to solve the equation :math:`P(C)=P(C|Y)P(Y)` as :math:`P(Y) = P(C|Y)^{-1} P(C)` where :math:`P(C|Y)^{-1}` is the matrix inversion of :math:`P(C|Y)`. This inversion may not exist in degenerated cases + * 'invariant-ratio': invariant ratio estimator of `Vaz et al. 2018 `_, which replaces the last equation with the normalization condition. + + :param str solver: the method to use for solving the system of linear equations. Valid options are: + + * 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has rank strictly less than `n_classes`. + * 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) + * 'exact': deprecated, defaults to 'exact-cc' + * 'minimize': minimizes a loss, so the solution always exists """ if solver == "exact": - warnings.warn("The 'exact' solver is deprecated. Use 'exact-raise' or 'exact-cc'", DeprecationWarning, stacklevel=2) + warnings.warn( + "The 'exact' solver is deprecated. Use 'exact-raise' or 'exact-cc'", DeprecationWarning, stacklevel=2) solver = "exact-cc" - A = np.array(p_c_y, dtype=float) - B = np.array(p_c, dtype=float) + A = np.asarray(p_c_cond_y, dtype=float) + B = np.asarray(p_c, dtype=float) if method == "inversion": pass # We leave A and B unchanged elif method == "invariant-ratio": - # Change the last equation to replace - # it with the normalization condition + # Change the last equation to replace it with the normalization condition A[-1, :] = 1.0 B[-1] = 1.0 else: diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 3b44491..bba0c15 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -352,103 +352,6 @@ class CC(AggregativeCrispQuantifier): return F.prevalence_from_labels(classif_predictions, self.classes_) -class ACC(AggregativeCrispQuantifier): - """ - `Adjusted Classify & Count `_, - the "adjusted" variant of :class:`CC`, that corrects the predictions of CC - according to the `misclassification rates`. - - :param classifier: a sklearn's Estimator that generates a classifier - :param val_split: specifies the data used for generating classifier predictions. This specification - can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to - be extracted from the training set; or as an integer (default 5), indicating that the predictions - are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value - for `k`); or as a collection defining the specific set of data to use for validation. - Alternatively, this set can be specified at fit time by indicating the exact set of data - on which the predictions are to be generated. - :param n_jobs: number of parallel workers - :param method: adjustment method to be used: - 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, - which tries to invert `P(C|Y)` matrix. - 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, - which replaces the last equation with the normalization condition. - :param solver: the method to use for solving the system of linear equations. Valid options are: - 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has - rank strictly less than `n_classes`. - 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds - to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) - 'exact': deprecated, defaults to 'exact-cc' - 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the default parameter. - More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and - Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications - (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. - :param clipping: the method to use for normalization. - If `None` or `"none"`, no normalization is performed. - If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. - If `"project"`, the values are projected onto the probability simplex. - """ - - def __init__( - self, - classifier: BaseEstimator, - val_split=5, - n_jobs=None, - solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', - method: Literal['inversion', 'invariant-ratio'] = 'inversion', - clipping: Literal['clip', 'none', 'project'] = 'clip', - ) -> None: - self.classifier = classifier - self.val_split = val_split - self.n_jobs = qp._get_njobs(n_jobs) - self.solver = solver - self.method = method - self.clipping = clipping - - def _check_init_parameters(self): - if self.solver not in ['exact', 'minimize', 'exact-raise', 'exact-cc']: - raise ValueError("unknown solver; valid ones are 'exact', 'minimize', 'exact-raise', 'exact-cc'") - if self.method not in ['inversion', 'invariant-ratio']: - raise ValueError("unknown method; valid ones are 'inversion', 'invariant-ratio'") - if self.clipping not in ['clip', 'none', 'project', None]: - raise ValueError("unknown clipping; valid ones are 'clip', 'none', 'project' or None") - - def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): - """ - Estimates the misclassification rates. - - :param classif_predictions: a :class:`quapy.data.base.LabelledCollection` containing, - as instances, the label predictions issued by the classifier and, as labels, the true labels - :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data - """ - pred_labels, true_labels = classif_predictions.Xy - self.cc = CC(self.classifier) - self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, true_labels, pred_labels) - - @classmethod - def getPteCondEstim(cls, classes, y, y_): - # estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a - # document that belongs to yj ends up being classified as belonging to yi - conf = confusion_matrix(y, y_, labels=classes).T - conf = conf.astype(float) - class_counts = conf.sum(axis=0) - for i, _ in enumerate(classes): - if class_counts[i] == 0: - conf[i, i] = 1 - else: - conf[:, i] /= class_counts[i] - return conf - - def aggregate(self, classif_predictions): - prevs_estim = self.cc.aggregate(classif_predictions) - estimate = F.solve_adjustment( - p_c_y=self.Pte_cond_estim_, - p_c=prevs_estim, - solver=self.solver, - method=self.method, - ) - return F.clip_prevalence(estimate, method=self.clipping) - - class PCC(AggregativeSoftQuantifier): """ `Probabilistic Classify & Count `_, @@ -473,49 +376,178 @@ class PCC(AggregativeSoftQuantifier): return F.prevalence_from_probabilities(classif_posteriors, binarize=False) +class ACC(AggregativeCrispQuantifier): + """ + `Adjusted Classify & Count `_, + the "adjusted" variant of :class:`CC`, that corrects the predictions of CC + according to the `misclassification rates`. + + :param classifier: a sklearn's Estimator that generates a classifier + + :param val_split: specifies the data used for generating classifier predictions. This specification + can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to + be extracted from the training set; or as an integer (default 5), indicating that the predictions + are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value + for `k`); or as a collection defining the specific set of data to use for validation. + Alternatively, this set can be specified at fit time by indicating the exact set of data + on which the predictions are to be generated. + + :param str method: adjustment method to be used: + + * 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, which tries to invert :math:`P(C|Y)` matrix. + * 'invariant-ratio': invariant ratio estimator of `Vaz et al. 2018 `_, which replaces the last equation with the normalization condition. + + :param str solver: indicates the method to use for solving the system of linear equations. Valid options are: + + * 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has rank strictly less than `n_classes`. + * 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) + * 'exact': deprecated, defaults to 'exact-cc' + * 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the default parameter. More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. + + :param str clipping: the method to use for normalization. + + * If `None` or `"none"`, no normalization is performed. + * If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. + * If `"project"`, the values are projected onto the probability simplex. + + :param n_jobs: number of parallel workers + """ + def __init__( + self, + classifier: BaseEstimator, + val_split=5, + solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', + method: Literal['inversion', 'invariant-ratio'] = 'inversion', + clipping: Literal['clip', 'none', 'project'] = 'clip', + n_jobs=None, + ): + self.classifier = classifier + self.val_split = val_split + self.n_jobs = qp._get_njobs(n_jobs) + self.solver = solver + self.method = method + self.clipping = clipping + + SOLVERS = ['exact', 'minimize', 'exact-raise', 'exact-cc'] + METHODS = ['inversion', 'invariant-ratio'] + CLIPPING = ['clip', 'none', 'project', None] + + + @classmethod + def newInvariantRatioEstimation(cls, classifier: BaseEstimator, val_split=5, n_jobs=None): + """ + Constructs a quantifier that implements the Invariant Ratio Estimator of + `Vaz et al. 2018 _`. This amounts + to setting method to 'invariant-ratio' and clipping to 'project'. + + :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: specifies the data used for generating classifier predictions. This specification + can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to + be extracted from the training set; or as an integer (default 5), indicating that the predictions + are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value + for `k`); or as a collection defining the specific set of data to use for validation. + Alternatively, this set can be specified at fit time by indicating the exact set of data + on which the predictions are to be generated. + :param n_jobs: number of parallel workers + :return: an instance of ACC configured so that it implements the Invariant Ratio Estimator + """ + return ACC(classifier, val_split=val_split, method='invariant-ratio', clipping='project', n_jobs=n_jobs) + + def _check_init_parameters(self): + if self.solver not in ACC.SOLVERS: + raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}") + if self.method not in ACC.METHODS: + raise ValueError(f"unknown method; valid ones are {ACC.METHODS}") + if self.clipping not in ACC.CLIPPING: + raise ValueError(f"unknown clipping; valid ones are {ACC.CLIPPING}") + + def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): + """ + Estimates the misclassification rates. + + :param classif_predictions: a :class:`quapy.data.base.LabelledCollection` containing, + as instances, the label predictions issued by the classifier and, as labels, the true labels + :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data + """ + pred_labels, true_labels = classif_predictions.Xy + self.cc = CC(self.classifier) + self.Pte_cond_estim_ = ACC.getPteCondEstim(self.classifier.classes_, true_labels, pred_labels) + + @classmethod + def getPteCondEstim(cls, classes, y, y_): + """ + Estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a + document that belongs to yj ends up being classified as belonging to yi + + :param classes: array-like with the class names + :param y: array-like with the true labels + :param y_: array-like with the estimated labels + :return: np.ndarray + """ + conf = confusion_matrix(y, y_, labels=classes).T + conf = conf.astype(float) + class_counts = conf.sum(axis=0) + for i, _ in enumerate(classes): + if class_counts[i] == 0: + conf[i, i] = 1 + else: + conf[:, i] /= class_counts[i] + return conf + + def aggregate(self, classif_predictions): + prevs_estim = self.cc.aggregate(classif_predictions) + estimate = F.solve_adjustment( + p_c_cond_y=self.Pte_cond_estim_, + p_c=prevs_estim, + solver=self.solver, + method=self.method, + ) + return F.clip_prevalence(estimate, method=self.clipping) + + class PACC(AggregativeSoftQuantifier): """ `Probabilistic Adjusted Classify & Count `_, the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier. :param classifier: a sklearn's Estimator that generates a classifier + :param val_split: specifies the data used for generating classifier predictions. This specification can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to be extracted from the training set; or as an integer (default 5), indicating that the predictions are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value for `k`). Alternatively, this set can be specified at fit time by indicating the exact set of data on which the predictions are to be generated. - :param n_jobs: number of parallel workers - :param method: adjustment method to be used: - 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, - which tries to invert `P(C|Y)` matrix. - 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, - which replaces the last equation with the normalization condition. - :param solver: the method to use for solving the system of linear equations. Valid options are: - 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has - rank strictly less than `n_classes`. - 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds - to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) - 'exact': deprecated, defaults to 'exact-cc' - 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the default parameter. - More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and - Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications - (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. - :param clipping: the method to use for normalization. - If `None` or `"none"`, no normalization is performed. - If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. - If `"project"`, the values are projected onto the probability simplex. - """ + :param str method: adjustment method to be used: + + * 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, which tries to invert `P(C|Y)` matrix. + * 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, which replaces the last equation with the normalization condition. + + :param str solver: the method to use for solving the system of linear equations. Valid options are: + + * 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has rank strictly less than `n_classes`. + * 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) + * 'exact': deprecated, defaults to 'exact-cc' + * 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the default parameter. More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. + + :param str clipping: the method to use for normalization. + + * If `None` or `"none"`, no normalization is performed. + * If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. + * If `"project"`, the values are projected onto the probability simplex. + + :param n_jobs: number of parallel workers + """ def __init__( self, classifier: BaseEstimator, val_split=5, - n_jobs=None, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', clipping: Literal['clip', 'none', 'project'] = 'clip', - ) -> None: + n_jobs=None, + ): self.classifier = classifier self.val_split = val_split self.n_jobs = qp._get_njobs(n_jobs) @@ -525,12 +557,12 @@ class PACC(AggregativeSoftQuantifier): self.clipping = clipping def _check_init_parameters(self): - if self.solver not in ['exact', 'minimize', 'exact-raise', 'exact-cc']: - raise ValueError("unknown solver; valid ones are 'exact', 'minimize', 'exact-raise', 'exact-cc'") - if self.method not in ['inversion', 'invariant-ratio']: - raise ValueError("unknown method; valid ones are 'inversion', 'invariant-ratio'") - if self.clipping not in ['clip', 'none', 'project', None]: - raise ValueError("unknown clipping; valid ones are 'clip', 'none', 'project' or None") + if self.solver not in ACC.SOLVERS: + raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}") + if self.method not in ACC.METHODS: + raise ValueError(f"unknown method; valid ones are {ACC.METHODS}") + if self.clipping not in ACC.CLIPPING: + raise ValueError(f"unknown clipping; valid ones are {ACC.CLIPPING}") def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): """ @@ -542,20 +574,19 @@ class PACC(AggregativeSoftQuantifier): """ posteriors, true_labels = classif_predictions.Xy self.pcc = PCC(self.classifier) - self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, true_labels, posteriors) + self.Pte_cond_estim_ = PACC.getPteCondEstim(self.classifier.classes_, true_labels, posteriors) def aggregate(self, classif_posteriors): prevs_estim = self.pcc.aggregate(classif_posteriors) estimate = F.solve_adjustment( - p_c_y=self.Pte_cond_estim_, + p_c_cond_y=self.Pte_cond_estim_, p_c=prevs_estim, solver=self.solver, method=self.method, ) return F.clip_prevalence(estimate, method=self.clipping) - @classmethod def getPteCondEstim(cls, classes, y, y_): # estimate the matrix with entry (i,j) being the estimate of P(hat_yi|yj), that is, the probability that a @@ -906,7 +937,7 @@ class HDy(AggregativeSoftQuantifier, BinaryAggregativeQuantifier): # at small steps (modern implementations resort to an optimization procedure, # see class DistributionMatching) prev_selected, min_dist = None, None - for prev in F.prevalence_linspace(n_prevalences=101, repeats=1, smooth_limits_epsilon=0.0): + for prev in F.prevalence_linspace(grid_points=101, repeats=1, smooth_limits_epsilon=0.0): Px_train = prev * Pxy1_density + (1 - prev) * Pxy0_density hdy = F.HellingerDistance(Px_train, Px_test) if prev_selected is None or hdy < min_dist: From aa894a3472e2ce0675d0991d7801aec721d3c021 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 19 Mar 2024 15:01:42 +0100 Subject: [PATCH 8/8] merging PR; I have taken this opportunity to refactor some issues I didnt like, including the normalization of prevalence vectors, and improving the documentation here and there --- TODO.txt | 97 ----- docs/build/html/genindex.html | 30 +- docs/build/html/index.html | 12 +- docs/build/html/modules.html | 10 +- docs/build/html/objects.inv | Bin 3505 -> 3532 bytes docs/build/html/quapy.html | 293 +++++++++----- docs/build/html/quapy.method.html | 66 +-- docs/build/html/searchindex.js | 2 +- quapy/functional.py | 643 +++++++++++++++++------------- quapy/method/aggregative.py | 96 +++-- quapy/model_selection.py | 1 + 11 files changed, 682 insertions(+), 568 deletions(-) diff --git a/TODO.txt b/TODO.txt index 6547a5b..e69de29 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,97 +0,0 @@ -check sphinks doc for enumerations (for example, the doc for ACC) - -ensembles seem to be broken; they have an internal model selection which takes the parameters, but since quapy now - works with protocols it would need to know the validation set in order to pass something like - "protocol: APP(val, etc.)" -sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified -clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.) -make truly parallel the GridSearchQ -make more examples in the "examples" directory -merge with master, because I had to fix some problems with QuaNet due to an issue notified via GitHub! -added cross_val_predict in qp.model_selection (i.e., a cross_val_predict for quantification) --would be nice to have - it parallelized - -check the OneVsAll module(s) - -check the set_params de neural.py, because the separation of estimator__ is not implemented; see also - __check_params_colision - -HDy can be customized so that the number of bins is specified, instead of explored within the fit method - -Packaging: -========================================== -Document methods with paper references -unit-tests -clean wiki_examples! - -Refactor: -========================================== -Unify ThresholdOptimization methods, as an extension of PACC (and not ACC), the fit methods are almost identical and - use a prob classifier (take into account that PACC uses pcc internally, whereas the threshold methods use cc - instead). The fit method of ACC and PACC has a block for estimating the validation estimates that should be unified - as well... -Refactor protocols. APP and NPP related functionalities are duplicated in functional, LabelledCollection, and evaluation - - -New features: -========================================== -Add "measures for evaluating ordinal"? -Add datasets for topic. -Do we want to cover cross-lingual quantification natively in QuaPy, or does it make more sense as an application on top? - -Current issues: -========================================== -Revise the class structure of quantification methods and the methods they inherit... There is some confusion regarding - methods isbinary, isprobabilistic, and the like. The attribute "learner_" in aggregative quantifiers is also - confusing, since there is a getter and a setter. -Remove the "deep" in get_params. There is no real compatibility with scikit-learn as for now. -SVMperf-based learners do not remove temp files in __del__? -In binary quantification (hp, kindle, imdb) we used F1 in the minority class (which in kindle and hp happens to be the -negative class). This is not covered in this new implementation, in which the binary case is not treated as such, but as -an instance of single-label with 2 labels. Check -Add automatic reindex of class labels in LabelledCollection (currently, class indexes should be ordered and with no gaps) -OVR I believe is currently tied to aggregative methods. We should provide a general interface also for general quantifiers -Currently, being "binary" only adds one checker; we should figure out how to impose the check to be automatically performed -Add random seed management to support replicability (see temp_seed in util.py). -GridSearchQ is not trully parallelized. It only parallelizes on the predictions. -In the context of a quantifier (e.g., QuaNet or CC), the parameters of the learner should be prefixed with "estimator__", - in QuaNet this is resolved with a __check_params_colision, but this should be improved. It might be cumbersome to - impose the "estimator__" prefix for, e.g., quantifiers like CC though... This should be changed everywhere... -QuaNet needs refactoring. The base quantifiers ACC and PACC receive val_data with instances already transformed. This - issue is due to a bad design. - -Improvements: -========================================== -Explore the hyperparameter "number of bins" in HDy -Rename EMQ to SLD ? -Parallelize the kFCV in ACC and PACC? -Parallelize model selection trainings -We might want to think of (improving and) adding the class Tabular (it is defined and used on branch tweetsent). A more - recent version is in the project ql4facct. This class is meant to generate latex tables from results (highligting - best results, computing statistical tests, colouring cells, producing rankings, producing averages, etc.). Trying - to generate tables is typically a bad idea, but in this specific case we do have pretty good control of what an - experiment looks like. (Do we want to abstract experimental results? this could be useful not only for tables but - also for plots). -Add proper logging system. Currently we use print -It might be good to simplify the number of methods that have to be implemented for any new Quantifier. At the moment, - there are many functions like get_params, set_params, and, specially, @property classes_, which are cumbersome to - implement for quick experiments. A possible solution is to impose get_params and set_params only in cases in which - the model extends some "ModelSelectable" interface only. The classes_ should have a default implementation. - -Checks: -========================================== -How many times is the system of equations for ACC and PACC not solved? How many times is it clipped? Do they sum up - to one always? -Re-check how hyperparameters from the quantifier and hyperparameters from the classifier (in aggregative quantifiers) - is handled. In scikit-learn the hyperparameters from a wrapper method are indicated directly whereas the hyperparams - from the internal learner are prefixed with "estimator__". In QuaPy, combinations having to do with the classifier - can be computed at the begining, and then in an internal loop the hyperparams of the quantifier can be explored, - passing fit_learner=False. -Re-check Ensembles. As for now, they are strongly tied to aggregative quantifiers. -Re-think the environment variables. Maybe add new ones (like, for example, parameters for the plots) -Do we want to wrap prevalences (currently simple np.ndarray) as a class? This might be convenient for some interfaces - (e.g., for specifying artificial prevalences in samplings, for printing them -- currently supported through - F.strprev(), etc.). This might however add some overload, and prevent/difficult post processing with numpy. -Would be nice to get a better integration with sklearn. - - diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html index 9b2dce2..0099e44 100644 --- a/docs/build/html/genindex.html +++ b/docs/build/html/genindex.html @@ -116,8 +116,6 @@
    • acce() (in module quapy.error)
    • add_word() (quapy.data.preprocessing.IndexTransformer method) -
    • -
    • adjusted_quantification() (in module quapy.functional)
    • AdjustedClassifyAndCount (in module quapy.method.aggregative)
    • @@ -297,9 +295,7 @@
      +
    • condsoftmax() (in module quapy.functional) +
    • ConfigStatus (class in quapy.model_selection)
    • counts() (quapy.data.base.LabelledCollection method) @@ -647,18 +645,20 @@

      L

      + - - +
      @@ -1225,8 +1227,12 @@
    • SMM (class in quapy.method.aggregative)
    • smooth() (in module quapy.error) +
    • +
    • softmax() (in module quapy.functional)
    • solve_adjustment() (in module quapy.functional) +
    • +
    • solve_adjustment_binary() (in module quapy.functional)
    • SOLVERS (quapy.method.aggregative.ACC attribute)
    • @@ -1265,6 +1271,8 @@
    • T50 (class in quapy.method._threshold_optim)
    • temp_seed() (in module quapy.util) +
    • +
    • ternary_search() (in module quapy.functional)
    • text2tfidf() (in module quapy.data.preprocessing)
    • diff --git a/docs/build/html/index.html b/docs/build/html/index.html index 7d09502..e3156f3 100644 --- a/docs/build/html/index.html +++ b/docs/build/html/index.html @@ -263,8 +263,8 @@
    • Submodules
    • quapy.method.aggregative module
      • ACC
          -
        • ACC.CLIPPING
        • ACC.METHODS
        • +
        • ACC.NORMALIZATIONS
        • ACC.SOLVERS
        • ACC.aggregate()
        • ACC.aggregation_fit()
        • @@ -564,24 +564,28 @@
        • quapy.functional module diff --git a/docs/build/html/modules.html b/docs/build/html/modules.html index 96bd5b3..be528e0 100644 --- a/docs/build/html/modules.html +++ b/docs/build/html/modules.html @@ -153,24 +153,28 @@
        • quapy.functional module diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv index c545ad3f0d4b554cf7e42ca2a2c3efe7aff55be4..d0f12857371b263f645bab3c3122be2b189be5bd 100644 GIT binary patch delta 3348 zcmV+v4eRo;8_XM!yMHlV5qp^%1m23kB z1Xxm1eW-4JoL@?*R4OgW;?Eps+g7&yIbl&Alh#kIKlLZUN+$6?%g;XltM&O)`Dqwq zA?SjJSW=c6#Evg$LcErF?j6Erk!IA#nr7ZR}?fNkAEIt??j%E-8&MEIh*4- zjj50{>{K#@f2_7gEXc=eED=N?tvcf@BV2AjPWyt&!XJrTvFJn9qHz7AG)s=wIQ|jZ zGfpBJ${(m2W%?S(0^BR~Kn9+&t^z3ZRjuUZzU4K9wqp>2GZg5+j_&^&bPlAKrZ(Z92lu%I&woL;AR=oT-n|YGL*We#;Er@j z5zrvmObg5icte*Onw0UTE&WtK|0!?B}Xf z3%p|<-Hi|4^Uqb>qc@&wfgiHYb=+(4I@h$pYxN4{DTVBUjEAc(mg-&^s`u510T%jI z%y3kWyMI0<7VAWrbpe#1SF~XII^0n`>V85~SwKj2yg`dR4foTZ{*~V=02u#IkwD|{ zDRzxx8ljl!={}c^7%2xbw7LHHLQ5B4&8vA$2eIH{e>g~A5eueSOyyMSDIC>zO>w#z zXHHWUR*Vs)8Q|(9Ycs^#FP&HC3g~|CES#dM_<-V8Xi(1NoB(KI3(P%Io#4}eJBEJiGW z<5LOu5!J+WitPqam5(FHmOf>|HH~F(i^^e0IAl^g(O^MjxI+D3I**#NPly}qt^hBc z{$mlIYuTAU%se{!aEEy;X%X0f3VHwn7d9s9yN9119+Lb3-hEY6cu_>j~nmPVLI zRefABl5FIIT+nD?N8w~Yv18YPO2IPUwk~@mTx3C^Si1|040GY3T${~Fnv=qZP^<%b za|a-tGv0VdUw?6RfagNFnIvd0e5T5zCBR|mAVMszwzzij#kMSqFE)7Hm?=&yMBd`4 z`NfG%T@!*+8TY+6htjLB7_>(@BWFYTYc5cc;XyMLF&PSaitv9!DcCBluM7eG{= z!ML=Pua+V%-Qb)E?!Gr}-ZC5MAQC*OTPl?U5#FUbn`$A zLAl#rVL@VJis0@Er;u#7mt0cf!BQ=xZZ1OmDzWe7#E!Z})q!_| zM8oJ=qqub{HSFo(FZ!MfsW-BCilf6?iVa0_ZGS~eDM@${1D!g=ZmzY~diAMX2hzUj zEHKqf#QFGDesN(KtG+~U5!w342ETQC<_Cf5i0iV!V|z2BWZp}R=4RF$Z6 z7_z(DpMb7zkaggxOHxS}TNs+~E?t62v43IDn7CXg$Q37I#o`DRxxH(=IKc$N_BY!u zU_cbNoYX0mIHOIy%i}YKUxB#>cmopBTo3;CdI%yaLYEMI%9{QAC*R^I1fYFpyZ6uf zE&c1l)4u>vK&QLGNAFaw4nlWU2j%u=Eq88dFvs?ek{&T~-x6!$81`#~pxx(h zIw7qXyYPd5$P#dH;kst0Yh?KJA>xn ziIgEK6R`V0179b&Om%{?5!jSEw@kRaXu5pRi5;mIpqIzV6ohJgnV zC2CYXgzOVl)jWq~yKQ~T0+6QDr6DkQNvl>KnaV^sW!#HhEV@AtEmx?%fq&u}Iwi|; zQP;0iUsIus4xz%U^Fehw*vCUN?QGS>eqirpBd`S8BhCz=aFYSYtpd&4fSWe2>=SD(d!_X zMh8hzHRsleAx3ZmlF<&7d6MvT~T{y<4$!b#Z z2)y2==X?)rAh_J`nlP?YVI`}$qxGxSZea6~{gtGI(+~jRG1qIvxwg_N$E%r?+t;mZ z=lJSB?CAJfH0V_chP~FH=PucE2^FPil{dSHjy>v{3X;sNcOxFj2CwZwzl+AtgMO#q zb6M3r#+@vr+jWOylz&4%I0m?{)g5!PdNSf$E7x9mOc8!n^IL>9A)H$aZh_T^^D;Nw z;;9Mu0ATh9D7!xZ8~y>*@(;eICsoJ!1?%SnU4wPGo^n{NiE2}*_Q+JF?Ed?XPAEZl zxUuvr10i_oZfxyr!ZzJHOJt{98P`$f0oy*?hT z(ntq{!J8?9YLW@T(U5rBri4ILz9PZaE)X)(E54$5vVugYdImPOh-EAyF%qP9bXKEh z#ZTmd4oeqwIh9!E;@s%hl^zKP3ru6U*zlK7)G#A)5@R5n8H7brD~3mS;5=xFw&(Og0Wm`r_)iS~RES zF%XTl90@LCm5jx@9yb=N*9!;;UazPta(<)nQZSl&4{jU#r33g zNO62fdVlL}bTgLkNnMtxTyID>rWx)ALiUz97j`s49c1h1eWgn|iM?N+2wE#1*+jgN zOUxSi#;lP;%o?d|g8O^pb9#RKu6-L_qXd)Q>}4`Svp>eYDO};I&9c@Z?Its>zAoV8 z(h(g_cvhT=wckb`A(h<}+fHnp<;BLIUtIk8#edhIUmX1Tr95WF(VufknG8OMv@VGPxbE-paqSIo77Q}MPr8U z3B_BqVo7t=2`=vqlQ+faRq`HnlQ?~uQ`=UfuN1v2B%kC!XTC0!Bi;K#9sxfZB$6FOY z(qkP3qpsm#q4H%*`vqPSm23kx z2(YB2=1|@IIKPxqsZ?5$#h*FOww-MIOTwZ&CY>*xKlLZUN+$6?%g;XltMmC&`Dqwq zA?SjJSW=c6#F`hhq68t1>73t)7(8@m({58kadqeG1qH3h<9~Z>Ipf`F3FV_S=zf)qvyTze7SW(>3tUuYWJYgNUqYc=I|y424%TfIHGT zML>gKJ1sCH;1ykJXi~;YKgxjOqZ9!TwZV*Rr~SQuhlD*rxdaaG9IpaSgLzvsNPp223Y7< zF~du5*ysQVF3WdR}8@dhpOG~7>r`d5Ch0ATz-Mgon) z$JjNFX@p{?r~6zwVx%0%(B}H%Gc8?wHLvD19mImO{ox>eK`fYNF_lxPr*Kr?HO1+6 zoHj0;gCt~M1uv9;R+4DbssckpAfg!T>)OY zgNGtK*RnHzn0a*c;g0fH(ju?{74!fEE^JKHcXwaz?vn@vB7fbG4+gq(&USNSrU~YD z1vvo)IEv6a<5mIh3etSz=s`o5W(8fjw_qAlX*cpd7&|&#bz6c$7UxMSd`M~(OC!vq zsy?n5NjCCME@-r{qj0jH*s<$CrC^zFTbI2OF0!Cdtlfo0hPm)iuFd8o%}L=yDAobJ zy#o-=8E?I#uYb5Yz;mJ8OcJygK2c@T65ud&5Fr*r_jm3yy=AJCs-XZF;}?x`i5ThZ;V78Vy<)zYq# z0GS}H!8sO!UlVz0&xfWngDfV?3r_tK8mK>{KhE;=aw4ss66L;QY z&wtrCk39(E)F|G@84aB)Ga5ovrbcnC%-Kf?S_PSEBh`-_kBJ~1qAE|&>%#fez4I6_5o>l!bPFn_`5{-7Y9tOeTZzcZVG87**cTPJaPN z)2HEU{~3bh+>g(J9DC3)P9;YZNxI65`eK&>c;Hgp~V(?CGGveKkv6 zJ&&@zPou7$YxQlI!|N`mBKuLzfm>(rXP_ezDMM5yV7Gw=zD{tS?4CHcRN$IPx5NwR z@>HE}|hKhEak0ooY+=-hbq`qif5c^A8}w=-I-PZ_=Q^{jx@0;}{j2sc+Gm zdpLp{7p80>LAw0|-V&k0lUJg2fWQO{0}mid)Tnw0*{7(gc^XTChn0OGO~;EuVDOSw zExIw4iEzre7rR*Ph8|ijP<;c%C3H%b<$9)Hr@o{@8684}SLcK3bg-QbIDaMXjkX>p zQqnLss)GQGq&LSD7E!Dbmf?z~%in!1wVnD{YC8q8)OHGKss8APN3E2!`t#JU|Byaf z)0;Dy4u(%-&vw?N(fv;}nDw6lP#}$72f;KtNQ$aC*ACWb58GrUl(_7&oBiZlbD;ES zjK`i&q*Kx`O<4y4H08-=LVw>d_mq-XaUaL`&G|w{vl(bGW;B5zTzcX+vpE5q`pb$=?XWEFR`e%0Cy zY(BEzNlG{k0T3Q@y+)jCD;;yZnn}5R-O6^3ul~c1j;}?7ewAR@YYqDDl6{v@QHoZ1 zvy14&qpqnS$=rG$Vna4~y&n#GX!0~1bO(KxRoy1;XbIh}J2X)a{h$eOU#n|!vU)P& zTPxRI*`x@+s`)L#ntu?^tp&HhYQ%Y&8*cH`gu4SU`yG_s?|=<|2Wt5{U(=JS<9x&V z0XWxSU9P7bR%@c#6skQkRVlmwj-w+=&>L+m{mMWH9(x;GJDV^L&S}E9y3z^DFy;Nz zVaPk?7ISfVGNf-LrYpvx$$rtTx!1>|Ra)tQFnBvfP)+hoaDOx+p0+6=5S6b;u(b<> zjP#1HD4wYx5vrafjV)psi%5(Fshu#@>XGUrxuDC^1zk=hmbo}J`emgJ;b4JjY&`VM z1|srMpLSzU`OaxHEgr=EVO@=yY%nK6qT&LtGMsTe3&6$E*Mf7wb1f*sqco1rRe41J z`T6P0l2SXsV1HcP(fA;yFWqi`I`u)!2BZGd%ghN8awdfSRj~TrtE*4n9zn<^!)b(; zs!?6Umz?EU%>}N>XEBqlgOa|uI@J};X?e~;Yb_1IWvr61*j7}fmdoA26z$|Vdn4&w zKm4DoHc4=(^eYah3)vb{oHAl3#+hUxYkz>oP9V6SOn9 zPC}w|G9n2VmH^2lj%{S-dH@dPq-NHZ7CMr2Ry|d3S6ELYObLmKAIWnp$~jV?{^H#| zJy7X_^3YhMPiHmH>Nu0_J*OSf)vb7O+zZ9=A?dBR(bZVKC-qpOa=j7Vm}a>IPj4l!%2vI*|*jZf+6DYf=(bcqr? z_h&E9V>J7H(x1W=uG%bX9n!8c!VB8%1x-4lwpXvLEDs`Y7B+GUjM$E}5&1yG)5IDBg_ zN5i+cIN4CJoQLhJCGeUQ$H&?=gd|C(Zk}QMMJuNA{_#n59fPK9`+4_8 ziK-4gDRq|JP1LnkAC(sumHAX-T!BxvDt@F*9R;JV;b5WiWk>r3UJ~PP9-Rsre$vSJ z%@MZvt)mmz{wUy1{TGoMf>lgB5v@(}bsCyyXI0|W5pxZTm7kSDOjOt D9hhPy diff --git a/docs/build/html/quapy.html b/docs/build/html/quapy.html index 249ce67..0803b74 100644 --- a/docs/build/html/quapy.html +++ b/docs/build/html/quapy.html @@ -97,24 +97,28 @@
        • quapy.functional module @@ -374,8 +378,8 @@
        • Submodules
        • quapy.method.aggregative module
          • ACC
              -
            • ACC.CLIPPING
            • ACC.METHODS
            • +
            • ACC.NORMALIZATIONS
            • ACC.SOLVERS
            • ACC.aggregate()
            • ACC.aggregation_fit()
            • @@ -1448,28 +1452,6 @@ The Topsoe distance for two discrete distributions of k bins is def -
              -
              -quapy.functional.adjusted_quantification(prevalence_estim: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], tpr: float, fpr: float, clip: bool = True)[source]
              -

              Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the -positive class p comes down to computing:

              -
              -\[ACC(p) = \frac{ p - fpr }{ tpr - fpr }\]
              -
              -
              Parameters:
              -
                -
              • prevalence_estim – float, the estimated value for the positive class

              • -
              • tpr – float, the true positive rate of the classifier

              • -
              • fpr – float, the false positive rate of the classifier

              • -
              • clip – set to True (default) to clip values that might exceed the range [0,1]

              • -
              -
              -
              Returns:
              -

              float, the adjusted count

              -
              -
              -
              -
              quapy.functional.argmin_prevalence(loss: Callable, n_classes: int, method: Literal['optim_minimize', 'linear_search', 'ternary_search'] = 'optim_minimize')[source]
              @@ -1493,14 +1475,14 @@ positive class p comes down to computing:

              -quapy.functional.as_binary_prevalence(positive_prevalence: Union[float, ndarray], clip_if_necessary: bool = False)[source]
              +quapy.functional.as_binary_prevalence(positive_prevalence: Union[float, _SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], clip_if_necessary: bool = False) ndarray[source]

              Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two values representing a binary distribution.

              Parameters:
                -
              • positive_prevalence – prevalence for the positive class

              • -
              • clip_if_necessary – if True, clips the value in [0,1] in order to guarantee the resulting distribution +

              • positive_prevalence – float or array-like of floats with the prevalence for the positive class

              • +
              • clip_if_necessary (bool) – if True, clips the value in [0,1] in order to guarantee the resulting distribution is valid. If False, it then checks that the value is in the valid range, and raises an error if not.

              @@ -1512,43 +1494,52 @@ is valid. If False, it then checks that the value is in the valid range, and rai
              -quapy.functional.check_prevalence_vector(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], raise_exception: bool = False, toleranze: float = 1e-08)[source]
              -

              Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1.

              -
              -
              Parameters:
              -

              prevalences – the prevalence vector to check

              -
              -
              Returns:
              -

              True if p is valid, False otherwise

              -
              -
              -
              - -
              -
              -quapy.functional.clip_prevalence(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], method: Literal[None, 'none', 'clip', 'project']) ndarray[source]
              -

              Clips the proportions vector prevalences so that it is a valid probability distribution, i.e., all values -are in [0,1] and sum up to 1.

              +quapy.functional.check_prevalence_vector(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], raise_exception: bool = False, tolerance: float = 1e-08, aggr=True)[source] +

              Checks that prevalences is a valid prevalence vector, i.e., it contains values in [0,1] and +the values sum up to 1. In other words, verifies that the prevalences vectors lies in the +probability simplex.

              Parameters:
                -
              • prevalences – array-like, the proportions vector to be clipped, shape (n_classes,)

              • -
              • method – indicates the method to be used for normalization. -If None or “none”, no normalization is performed. -If “clip”, the values are clipped to the range [0,1] and normalized, so they sum up to 1. -If “project”, the values are projected onto the probability simplex.

              • +
              • prevalences (ArrayLike) – the prevalence vector, or vectors, to check

              • +
              • raise_exception (bool) – whether to raise an exception if the vector (or any of the vectors) does +not lie in the simplex (default False)

              • +
              • tolerance (float) – error tolerance for the check sum(prevalences) - 1 = 0

              • +
              • aggr (bool) – if True (default) returns one single bool (True if all prevalence vectors are valid, +False otherwise), if False returns an array of bool, one for each prevalence vector

              Returns:
              -

              the normalized prevalence vector, shape (n_classes,)

              +

              a single bool True if prevalences is a vector of prevalence values that lies on the simplex, +or False otherwise; alternatively, if prevalences is a matrix of shape (num_vectors, n_classes,) +then it returns one such bool for each prevalence vector

              +
              +
              +quapy.functional.clip(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
              +

              Clips the values in [0,1] and then applies the L1 normalization.

              +
              +
              Parameters:
              +

              prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

              +
              +
              Returns:
              +

              np.ndarray representing a valid distribution

              +
              +
              +
              + +
              +
              +quapy.functional.condsoftmax(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
              +
              +
              -quapy.functional.counts_from_labels(labels: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], classes: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]])[source]
              -

              Computes the count values from a vector of labels.

              +quapy.functional.counts_from_labels(labels: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], classes: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source] +

              Computes the raw count values from a vector of labels.

              Parameters:
                @@ -1558,7 +1549,8 @@ some classes have no examples.

              Returns:
              -

              an ndarray of shape (len(classes),) with the occurrence counts of each class

              +

              ndarray of shape (len(classes),) with the raw counts for each class, in the same order +as they appear in classes

              @@ -1581,16 +1573,16 @@ divergence from the string name.

              -quapy.functional.get_nprevpoints_approximation(combinations_budget: int, n_classes: int, n_repeats: int = 1)[source]
              +quapy.functional.get_nprevpoints_approximation(combinations_budget: int, n_classes: int, n_repeats: int = 1) int[source]

              Searches for the largest number of (equidistant) prevalence points to define for each of the n_classes classes so that the number of valid prevalence values generated as combinations of prevalence points (points in a n_classes-dimensional simplex) do not exceed combinations_budget.

              Parameters:
                -
              • combinations_budget – integer, maximum number of combinations allowed

              • -
              • n_classes – integer, number of classes

              • -
              • n_repeats – integer, number of repetitions for each prevalence combination

              • +
              • combinations_budget (int) – maximum number of combinations allowed

              • +
              • n_classes (int) – number of classes

              • +
              • n_repeats (int) – number of repetitions for each prevalence combination

              Returns:
              @@ -1599,6 +1591,24 @@ that the number of valid prevalence values generated as combinations of prevalen
              +
              +
              +quapy.functional.l1_norm(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
              +

              Applies L1 normalization to the unnormalized_arr so that it becomes a valid prevalence +vector. Zero vectors are mapped onto the uniform distribution. Raises an exception if +the resulting vectors are not valid distributions. This may happen when the original +prevalence vectors contain negative values. Use the clip normalization function +instead to avoid this possibility.

              +
              +
              Parameters:
              +

              prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

              +
              +
              Returns:
              +

              np.ndarray representing a valid distribution

              +
              +
              +
              +
              @@ -1618,35 +1628,27 @@ early methods in quantification literature used it, e.g., HDy). A most powerful
              -
              -
              -quapy.functional.map_onto_probability_simplex(unnormalized_arr: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
              -

              Projects a point onto the probability simplex.

              -

              The code is adapted from Mathieu Blondel’s BSD-licensed -implementation -which is accompanying the paper

              -

              Mathieu Blondel, Akinori Fujino, and Naonori Ueda. -Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex, -ICPR 2014, URL

              -
              -
              Parameters:
              -

              unnormalized_arr – point in n-dimensional space, shape (n,)

              -
              -
              Returns:
              -

              projection of v onto (n-1)-dimensional probability simplex, shape (n,)

              -
              -
              -
              -
              -quapy.functional.normalize_prevalence(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]])[source]
              -

              Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in +quapy.functional.normalize_prevalence(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], method='l1')[source] +

              Normalizes a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in cases in which the prevalence values are not all-zeros, and to convert the prevalence values into 1/n_classes in cases in which all values are zero.

              Parameters:
              -

              prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

              +
                +
              • prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

              • +
              • method (str) –

                indicates the normalization method to employ, options are:

                +
                  +
                • l1: applies L1 normalization (default); a 0 vector is mapped onto the uniform prevalence

                • +
                • clip: clip values in [0,1] and then rescales so that the L1 norm is 1

                • +
                • mapsimplex: projects vectors onto the probability simplex. This implementation relies on +Mathieu Blondel’s projection_simplex_sort

                • +
                • softmax: applies softmax to all vectors

                • +
                • condsoftmax: applies softmax only to invalid prevalence vectors

                • +
                +

              • +
              Returns:

              a normalized vector or matrix of prevalence values

              @@ -1656,7 +1658,7 @@ cases in which all values are zero.

              -quapy.functional.num_prevalence_combinations(n_prevpoints: int, n_classes: int, n_repeats: int = 1)[source]
              +quapy.functional.num_prevalence_combinations(n_prevpoints: int, n_classes: int, n_repeats: int = 1) int[source]

              Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if n_prevpoints equally distant prevalence values are generated and n_repeats repetitions are requested. The computation comes down to calculating:

              @@ -1668,14 +1670,15 @@ classes, and r is n_repeats. This solution comes from
              Parameters:
                -
              • n_classes – integer, number of classes

              • -
              • n_prevpoints – integer, number of prevalence points.

              • -
              • n_repeats – integer, number of repetitions for each prevalence combination

              • +
              • n_classes (int) – number of classes

              • +
              • n_prevpoints (int) – number of prevalence points.

              • +
              • n_repeats (int) – number of repetitions for each prevalence combination

              Returns:
              -

              The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the -number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]

              +

              The number of possible combinations. For example, if `n_classes`=2, `n_prevpoints`=5, `n_repeats`=1, +then the number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], +and [1.0,0.0]

              @@ -1712,7 +1715,8 @@ some classes have no examples.

            Returns:
            -

            an ndarray of shape (len(classes)) with the class prevalence values

            +

            ndarray of shape (len(classes),) with the class proportions for each class, in the same order +as they appear in classes

            @@ -1737,7 +1741,7 @@ converting the vectors of posterior probabilities into class indices, by taking
            -quapy.functional.prevalence_linspace(grid_points: int = 21, repeats: int = 1, smooth_limits_epsilon: float = 0.01)[source]
            +quapy.functional.prevalence_linspace(grid_points: int = 21, repeats: int = 1, smooth_limits_epsilon: float = 0.01) ndarray[source]

            Produces an array of uniformly separated values of prevalence. By default, produces an array of 21 prevalence values, with step 0.05 and with the limits smoothed, i.e.: @@ -1756,31 +1760,73 @@ step 0.05 and with the limits smoothed, i.e.:

            +
            +
            +quapy.functional.projection_simplex_sort(unnormalized_arr: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
            +

            Projects a point onto the probability simplex.

            +

            The code is adapted from Mathieu Blondel’s BSD-licensed +implementation +(see function projection_simplex_sort in their repo) which is accompanying the paper

            +

            Mathieu Blondel, Akinori Fujino, and Naonori Ueda. +Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex, +ICPR 2014, URL

            +
            +
            Parameters:
            +

            unnormalized_arr – point in n-dimensional space, shape (n,)

            +
            +
            Returns:
            +

            projection of unnormalized_arr onto the (n-1)-dimensional probability simplex, shape (n,)

            +
            +
            +
            + +
            +
            +quapy.functional.softmax(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]]) ndarray[source]
            +

            Applies the softmax function to all vectors even if the original vectors were valid distributions. +If you want to leave valid vectors untouched, use condsoftmax instead.

            +
            +
            Parameters:
            +

            prevalences – array-like of shape (n_classes,) or of shape (n_samples, n_classes,) with prevalence values

            +
            +
            Returns:
            +

            np.ndarray representing a valid distribution

            +
            +
            +
            +
            -quapy.functional.solve_adjustment(p_c_cond_y: ndarray, p_c: ndarray, method: Literal['inversion', 'invariant-ratio'], solver: Literal['exact', 'minimize', 'exact-raise', 'exact-cc']) ndarray[source]
            -

            Function that tries to solve for the equation \(P(C)=P(C|Y)P(Y)\), where \(P(C)\) is the vector of -prevalence values obtained by a classify and count, and \(P(C|Y)\) are the class-conditional misclassification -rates of the classifier.

            +quapy.functional.solve_adjustment(class_conditional_rates: ndarray, unadjusted_counts: ndarray, method: Literal['inversion', 'invariant-ratio'], solver: Literal['exact', 'minimize', 'exact-raise', 'exact-cc']) ndarray[source] +

            Function that tries to solve for \(p\) the equation \(q = M p\), where \(q\) is the vector of +unadjusted counts (as estimated, e.g., via classify and count) with \(q_i\) an estimate of +\(P(\hat{Y}=y_i)\), and where \(M\) is the matrix of class-conditional rates with \(M_{ij}\) an +estimate of \(P(\hat{Y}=y_i|Y=y_j)\).

            Parameters:
              -
            • p_c_cond_y – array of shape (n_classes, n_classes,) with entry (c,y) being the estimate -of \(P(C=c|Y=y)\), that is, the probability that an instance that belongs to class \(y\) -ends up being classified as belonging to class \(c\)

            • -
            • p_c – array of shape (n_classes,) containing the prevalence values as estimated by classify and count

            • +
            • class_conditional_rates – array of shape (n_classes, n_classes,) with entry (i,j) being the estimate +of \(P(\hat{Y}=y_i|Y=y_j)\), that is, the probability that an instance that belongs to class \(y_j\) +ends up being classified as belonging to class \(y_i\)

            • +
            • unadjusted_counts – array of shape (n_classes,) containing the unadjusted prevalence values (e.g., as +estimated by CC or PCC)

            • method (str) –

              indicates the adjustment method to be used. Valid options are:

                -
              • ’inversion’: tries to solve the equation \(P(C)=P(C|Y)P(Y)\) as \(P(Y) = P(C|Y)^{-1} P(C)\) where \(P(C|Y)^{-1}\) is the matrix inversion of \(P(C|Y)\). This inversion may not exist in degenerated cases

              • -
              • ’invariant-ratio’: invariant ratio estimator of Vaz et al. 2018, which replaces the last equation with the normalization condition.

              • +
              • inversion: tries to solve the equation \(q = M p\) as \(p = M^{-1} q\) where +\(M^{-1}\) is the matrix inversion of \(M\). This inversion may not exist in +degenerated cases.

              • +
              • invariant-ratio: invariant ratio estimator of Vaz et al. 2018, +which replaces the last equation in \(M\) with the normalization condition (i.e., that the sum of +all prevalence values must equal 1).

            • solver (str) –

              the method to use for solving the system of linear equations. Valid options are:

                -
              • ’exact-raise’: tries to solve the system using matrix inversion. Raises an error if the matrix has rank strictly less than n_classes.

              • -
              • ’exact-cc’: if the matrix is not of full rank, returns p_c as the estimates, which corresponds to no adjustment (i.e., the classify and count method. See quapy.method.aggregative.CC)

              • -
              • ’exact’: deprecated, defaults to ‘exact-cc’

              • -
              • ’minimize’: minimizes a loss, so the solution always exists

              • +
              • exact-raise: tries to solve the system using matrix inversion. Raises an error if the matrix has rank +strictly lower than n_classes.

              • +
              • exact-cc: if the matrix is not full rank, returns \(q\) (i.e., the unadjusted counts) as the estimates

              • +
              • exact: deprecated, defaults to ‘exact-cc’ (will be removed in future versions)

              • +
              • minimize: minimizes a loss, so the solution always exists

            @@ -1788,9 +1834,31 @@ ends up being classified as belonging to class +
            +quapy.functional.solve_adjustment_binary(prevalence_estim: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], tpr: float, fpr: float, clip: bool = True)[source]
            +

            Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the +positive class p comes down to computing:

            +
            +\[ACC(p) = \frac{ p - fpr }{ tpr - fpr }\]
            +
            +
            Parameters:
            +
              +
            • prevalence_estim (float) – the estimated value for the positive class (p in the formula)

            • +
            • tpr (float) – the true positive rate of the classifier

            • +
            • fpr (float) – the false positive rate of the classifier

            • +
            • clip (bool) – set to True (default) to clip values that might exceed the range [0,1]

            • +
            +
            +
            Returns:
            +

            float, the adjusted count

            +
            +
            +
            +
            -quapy.functional.strprev(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], prec: int = 3)[source]
            +quapy.functional.strprev(prevalences: Union[_SupportsArray[dtype], _NestedSequence[_SupportsArray[dtype]], bool, int, float, complex, str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]], prec: int = 3) str[source]

            Returns a string representation for a prevalence vector. E.g.,

            >>> strprev([1/3, 2/3], prec=2)
             >>> '[0.33, 0.67]'
            @@ -1799,8 +1867,8 @@ ends up being classified as belonging to class 
             
            Parameters:
              -
            • prevalences – a vector of prevalence values

            • -
            • prec – float precision

            • +
            • prevalences – array-like of prevalence values

            • +
            • prec – int, indicates the float precision (number of decimal values to print)

            Returns:
            @@ -1809,9 +1877,14 @@ ends up being classified as belonging to class + +
            +
            -quapy.functional.uniform_prevalence_sampling(n_classes: int, size: int = 1)[source]
            +quapy.functional.uniform_prevalence_sampling(n_classes: int, size: int = 1) ndarray[source]

            Implements the Kraemer algorithm for sampling uniformly at random from the unit simplex. This implementation is adapted from this post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_.

            @@ -1830,7 +1903,7 @@ for sampling uniformly at random from the unit simplex. This implementation is a
            -quapy.functional.uniform_simplex_sampling(n_classes: int, size: int = 1)
            +quapy.functional.uniform_simplex_sampling(n_classes: int, size: int = 1) ndarray

            Implements the Kraemer algorithm for sampling uniformly at random from the unit simplex. This implementation is adapted from this post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_.

            @@ -2811,7 +2884,7 @@ Seeds the child processes to ensure reproducibility when n_jobs>1.

          • seed – the numeric seed

          • asarray – set to True to return a np.ndarray instead of a list

          • backend – indicates the backend used for handling parallel works

          • -
          • open_args – if True, then the delayed function is called on *args_i, instead of on args_i

          • +
          • open_args – if True, then the delayed function is called on *args_i, instead of on args_i

          diff --git a/docs/build/html/quapy.method.html b/docs/build/html/quapy.method.html index 053fec3..c71d492 100644 --- a/docs/build/html/quapy.method.html +++ b/docs/build/html/quapy.method.html @@ -104,7 +104,7 @@

          quapy.method.aggregative module

          -class quapy.method.aggregative.ACC(classifier: BaseEstimator, val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', clipping: Literal['clip', 'none', 'project'] = 'clip', n_jobs=None)[source]
          +class quapy.method.aggregative.ACC(classifier: BaseEstimator, val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip', n_jobs=None)[source]

          Bases: AggregativeCrispQuantifier

          Adjusted Classify & Count, the “adjusted” variant of CC, that corrects the predictions of CC @@ -122,23 +122,31 @@ Alternatively, this set can be specified at fit time by indicating the exact set on which the predictions are to be generated.

        • method (str) –

          adjustment method to be used:

            -
          • ’inversion’: matrix inversion method based on the matrix equality \(P(C)=P(C|Y)P(Y)\), which tries to invert \(P(C|Y)\) matrix.

          • -
          • ’invariant-ratio’: invariant ratio estimator of Vaz et al. 2018, which replaces the last equation with the normalization condition.

          • +
          • ’inversion’: matrix inversion method based on the matrix equality \(P(C)=P(C|Y)P(Y)\), +which tries to invert \(P(C|Y)\) matrix.

          • +
          • ’invariant-ratio’: invariant ratio estimator of Vaz et al. 2018, +which replaces the last equation with the normalization condition.

        • solver (str) –

          indicates the method to use for solving the system of linear equations. Valid options are:

        • -
        • clipping (str) –

          the method to use for normalization.

          +
        • norm (str) –

          the method to use for normalization.

            -
          • If None or “none”, no normalization is performed.

          • -
          • If “clip”, the values are clipped to the range [0,1] and normalized, so they sum up to 1.

          • -
          • If “project”, the values are projected onto the probability simplex.

          • +
          • clip, the values are clipped to the range [0,1] and then L1-normalized.

          • +
          • mapsimplex projects vectors onto the probability simplex. This implementation relies on +Mathieu Blondel’s projection_simplex_sort

          • +
          • condsoftmax, applies a softmax normalization only to prevalence vectors that lie outside the simplex

        • n_jobs – number of parallel workers

        • @@ -146,13 +154,13 @@ on which the predictions are to be generated.

          -
          -CLIPPING = ['clip', 'none', 'project', None]
          +
          +METHODS = ['inversion', 'invariant-ratio']
          -
          -METHODS = ['inversion', 'invariant-ratio']
          +
          +NORMALIZATIONS = ['clip', 'mapsimplex', 'condsoftmax', None]
          @@ -212,7 +220,7 @@ document that belongs to yj ends up being classified as belonging to yi

          classmethod newInvariantRatioEstimation(classifier: BaseEstimator, val_split=5, n_jobs=None)[source]

          Constructs a quantifier that implements the Invariant Ratio Estimator of -Vaz et al. 2018 <https://jmlr.org/papers/v20/18-456.html>_. This amounts +Vaz et al. 2018. This amounts to setting method to ‘invariant-ratio’ and clipping to ‘project’.

          Parameters:
          @@ -1046,7 +1054,7 @@ probabilities are independent of each other, meaning that, in general, they do n
          -class quapy.method.aggregative.PACC(classifier: BaseEstimator, val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', clipping: Literal['clip', 'none', 'project'] = 'clip', n_jobs=None)[source]
          +class quapy.method.aggregative.PACC(classifier: BaseEstimator, val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip', n_jobs=None)[source]

          Bases: AggregativeSoftQuantifier

          Probabilistic Adjusted Classify & Count, the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier.

          @@ -1062,23 +1070,31 @@ for k). Alternatively, this set can be specified at fit time by ind on which the predictions are to be generated.

        • method (str) –

          adjustment method to be used:

            -
          • ’inversion’: matrix inversion method based on the matrix equality \(P(C)=P(C|Y)P(Y)\), which tries to invert P(C|Y) matrix.

          • -
          • ’invariant-ratio’: invariant ratio estimator of Vaz et al., which replaces the last equation with the normalization condition.

          • +
          • ’inversion’: matrix inversion method based on the matrix equality \(P(C)=P(C|Y)P(Y)\), +which tries to invert P(C|Y) matrix.

          • +
          • ’invariant-ratio’: invariant ratio estimator of Vaz et al., +which replaces the last equation with the normalization condition.

        • solver (str) –

          the method to use for solving the system of linear equations. Valid options are:

        • -
        • clipping (str) –

          the method to use for normalization.

          +
        • norm (str) –

          the method to use for normalization.

            -
          • If None or “none”, no normalization is performed.

          • -
          • If “clip”, the values are clipped to the range [0,1] and normalized, so they sum up to 1.

          • -
          • If “project”, the values are projected onto the probability simplex.

          • +
          • clip, the values are clipped to the range [0,1] and then L1-normalized.

          • +
          • mapsimplex projects vectors onto the probability simplex. This implementation relies on +Mathieu Blondel’s projection_simplex_sort

          • +
          • condsoftmax, applies a softmax normalization only to prevalence vectors that lie outside the simplex

        • n_jobs – number of parallel workers

        • diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 7f2792c..57c32b1 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"i": [0, 2, 3, 4, 5], "python": [0, 4], "base": [0, 1, 2, 3], "open": [0, 2, 4], "sourc": [0, 2, 3, 4, 5], "framework": [0, 5], "quantif": [0, 2, 3, 4, 5], "thi": [0, 2, 3, 4, 5], "contain": [0, 2, 3, 4, 5], "api": 0, "modul": [0, 1], "includ": [0, 4, 5], "pip": [0, 5], "host": 0, "http": [0, 2, 4, 5], "com": [0, 2], "hlt": 0, "isti": 0, "packag": [0, 1], "subpackag": [0, 1], "classif": [0, 1, 2, 4, 5], "submodul": [0, 1], "calibr": [0, 1, 2], "bctscalibr": [0, 2, 3], "nbvscalibr": [0, 2, 3], "recalibratedprobabilisticclassifi": [0, 2, 3], "recalibratedprobabilisticclassifierbas": [0, 2, 3], "classes_": [0, 2, 3, 4, 5], "fit": [0, 1, 2, 3, 4, 5], "fit_cv": [0, 2, 3], "fit_tr_val": [0, 2, 3], "predict": [0, 1, 2, 3, 5], "predict_proba": [0, 2, 3, 5], "tscalibr": [0, 2, 3], "vscalibr": [0, 2, 3], "method": [0, 1, 2], "lowranklogisticregress": [0, 2, 3], "get_param": [0, 1, 2, 3, 5], "set_param": [0, 1, 2, 3, 5], "transform": [0, 2, 3, 4, 5], "neural": [0, 1, 2, 4, 5], "cnnnet": [0, 2, 3, 5], "document_embed": [0, 2, 3], "train": [0, 2, 3, 4, 5], "vocabulary_s": [0, 2, 3, 4, 5], "lstmnet": [0, 2, 3], "neuralclassifiertrain": [0, 2, 3, 5], "devic": [0, 2, 3, 5], "reset_net_param": [0, 2, 3], "textclassifiernet": [0, 2, 3], "dimens": [0, 2, 3, 4, 5], "forward": [0, 2, 3, 5], "xavier_uniform": [0, 2, 3], "torchdataset": [0, 2, 3], "asdataload": [0, 2, 3], "svmperf": [0, 1, 2, 5], "decision_funct": [0, 2, 3, 5], "valid_loss": [0, 2, 3, 5], "data": [0, 1, 2, 3, 5], "dataset": [0, 1, 2, 3, 5], "splitstratifi": [0, 2, 4], "binari": [0, 2, 3, 4, 5], "kfcv": [0, 2, 3, 4], "load": [0, 2, 4, 5], "n_class": [0, 2, 3, 4, 5], "reduc": [0, 2, 4], "stat": [0, 2, 4], "train_test": [0, 2, 4], "labelledcollect": [0, 2, 4, 5], "x": [0, 2, 3, 4, 5], "xp": [0, 2, 4], "xy": [0, 2, 4], "count": [0, 2, 4, 5], "join": [0, 2, 4], "p": [0, 2, 3, 4, 5], "preval": [0, 2, 3, 4, 5], "sampl": [0, 1, 2, 3, 4, 5], "sampling_from_index": [0, 2, 4], "sampling_index": [0, 2, 4], "split_random": [0, 2, 4], "split_stratifi": [0, 2, 4], "uniform_sampl": [0, 2, 4], "uniform_sampling_index": [0, 2, 4], "y": [0, 2, 3, 4, 5], "fetch_ifcb": [0, 2, 4], "fetch_ucibinarydataset": [0, 2, 4], "fetch_ucibinarylabelledcollect": [0, 2, 4], "fetch_ucimulticlassdataset": [0, 2, 4], "fetch_ucimulticlasslabelledcollect": [0, 2, 4], "fetch_lequa2022": [0, 2, 4], "fetch_review": [0, 2, 4, 5], "fetch_twitt": [0, 2, 4], "warn": [0, 2, 4, 5], "preprocess": [0, 1, 2, 5], "indextransform": [0, 2, 4], "add_word": [0, 2, 4], "fit_transform": [0, 2, 4], "index": [0, 2, 3, 4, 5], "reduce_column": [0, 2, 4], "standard": [0, 2, 3, 4, 5], "text2tfidf": [0, 2, 4], "reader": [0, 1, 2], "binar": [0, 2, 4], "from_csv": [0, 2, 4], "from_spars": [0, 2, 4], "from_text": [0, 2, 4], "reindex_label": [0, 2, 4], "aggreg": [0, 1, 2], "acc": [0, 1, 2, 5], "clip": [0, 2, 5], "solver": [0, 2, 5], "aggregation_fit": [0, 2, 5], "getptecondestim": [0, 2, 5], "newinvariantratioestim": [0, 2, 5], "adjustedclassifyandcount": [0, 2, 5], "aggregativecrispquantifi": [0, 2, 5], "aggregativemedianestim": [0, 2, 5], "quantifi": [0, 1, 2, 4, 5], "aggregativequantifi": [0, 2, 5], "classifi": [0, 2, 3, 5], "classifier_fit_predict": [0, 2, 5], "val_split": [0, 2, 3, 5], "val_split_": [0, 2, 5], "aggregativesoftquantifi": [0, 2, 5], "bayesiancc": [0, 2, 5], "get_conditional_probability_sampl": [0, 2, 5], "get_prevalence_sampl": [0, 2, 5], "sample_from_posterior": [0, 2, 5], "binaryaggregativequantifi": [0, 2, 5], "neg_label": [0, 2, 5], "pos_label": [0, 2, 5], "cc": [0, 2, 5], "classifyandcount": [0, 2, 5], "dmy": [0, 2, 5], "distributionmatchingi": [0, 2, 5], "dy": [0, 2, 5], "emq": [0, 2, 5], "em": [0, 2, 5], "emq_bct": [0, 2, 5], "epsilon": [0, 2, 5], "max_it": [0, 2, 5], "expectationmaximizationquantifi": [0, 2, 5], "hdy": [0, 2, 5], "hellingerdistancei": [0, 2, 5], "onevsallaggreg": [0, 2, 5], "pacc": [0, 2, 5], "pcc": [0, 2, 5], "probabilisticadjustedclassifyandcount": [0, 2, 5], "probabilisticclassifyandcount": [0, 2, 5], "sld": [0, 2, 5], "smm": [0, 2, 5], "newelm": [0, 2, 5], "newsvma": [0, 2, 5], "newsvmkld": [0, 2, 5], "newsvmq": [0, 2, 5], "newsvmra": [0, 2, 5], "kdebas": [0, 2, 5], "bandwidth_method": [0, 2, 5], "get_kde_funct": [0, 2, 5], "get_mixture_compon": [0, 2, 5], "pdf": [0, 2, 5], "kdeyc": [0, 2, 5], "gram_matrix_mix_sum": [0, 2, 5], "kdeyhd": [0, 2, 5], "kdeyml": [0, 2, 5], "quanetmodul": [0, 2, 5], "quanettrain": [0, 2, 5], "clean_checkpoint": [0, 2, 5], "clean_checkpoint_dir": [0, 2, 5], "mae_loss": [0, 2, 5], "max": [0, 2, 5], "condit": [0, 2, 5], "m": [0, 2, 5], "ms2": [0, 2, 5], "discard": [0, 2, 5], "t50": [0, 2, 5], "thresholdoptim": [0, 2, 5], "aggregate_with_threshold": [0, 2, 5], "basequantifi": [0, 2, 5], "binaryquantifi": [0, 2, 5], "onevsal": [0, 2, 5], "onevsallgener": [0, 2, 5], "newonevsal": [0, 2, 5], "meta": [0, 1, 2], "eacc": [0, 2, 5], "ecc": [0, 2, 5], "eemq": [0, 2, 5], "ehdi": [0, 2, 5], "epacc": [0, 2, 5], "ensembl": [0, 2, 4, 5], "valid_polici": [0, 2, 5], "probabilist": [0, 2, 3, 5], "medianestim": [0, 2, 5], "medianestimator2": [0, 2, 5], "ensemblefactori": [0, 2, 5], "get_probability_distribut": [0, 2, 5], "non_aggreg": [0, 1, 2], "dmx": [0, 2, 5], "hdx": [0, 2, 5], "distributionmatchingx": [0, 2, 5], "maximumlikelihoodprevalenceestim": [0, 2, 5], "readm": [0, 2, 5], "std_constrained_linear_l": [0, 2, 5], "error": [0, 1, 3, 5], "absolute_error": [0, 1, 2], "acc_error": [0, 1, 2], "ae": [0, 1, 2], "f1_error": [0, 1, 2], "f1e": [0, 1, 2], "from_nam": [0, 1, 2], "kld": [0, 1, 2, 3, 5], "mae": [0, 1, 2, 3, 5], "mean_absolute_error": [0, 1, 2], "mean_normalized_absolute_error": [0, 1, 2], "mean_normalized_relative_absolute_error": [0, 1, 2], "mean_relative_absolute_error": [0, 1, 2], "mkld": [0, 1, 2, 5], "mnae": [0, 1, 2, 5], "mnkld": [0, 1, 2, 5], "mnrae": [0, 1, 2, 5], "mrae": [0, 1, 2, 3, 5], "mse": [0, 1, 2, 5], "nae": [0, 1, 2], "nkld": [0, 1, 2, 3, 5], "normalized_absolute_error": [0, 1, 2], "normalized_relative_absolute_error": [0, 1, 2], "nrae": [0, 1, 2], "rae": [0, 1, 2], "relative_absolute_error": [0, 1, 2], "se": [0, 1, 2], "smooth": [0, 1, 2], "evalu": [0, 1, 3, 4, 5], "evaluate_on_sampl": [0, 1, 2], "evaluation_report": [0, 1, 2], "function": [0, 1, 3, 4, 5], "hellingerdist": [0, 1, 2], "topsoedist": [0, 1, 2], "adjusted_quantif": [0, 1, 2], "argmin_preval": [0, 1, 2], "as_binary_preval": [0, 1, 2], "check_prevalence_vector": [0, 1, 2], "clip_preval": [0, 1, 2], "counts_from_label": [0, 1, 2], "get_diverg": [0, 1, 2], "get_nprevpoints_approxim": [0, 1, 2], "linear_search": [0, 1, 2], "map_onto_probability_simplex": [0, 1, 2], "normalize_preval": [0, 1, 2], "num_prevalence_combin": [0, 1, 2], "optim_minim": [0, 1, 2, 5], "prevalence_from_label": [0, 1, 2], "prevalence_from_prob": [0, 1, 2], "prevalence_linspac": [0, 1, 2], "solve_adjust": [0, 1, 2], "strprev": [0, 1, 2], "uniform_prevalence_sampl": [0, 1, 2], "uniform_simplex_sampl": [0, 1, 2], "model_select": [0, 1, 5], "configstatu": [0, 1, 2], "fail": [0, 1, 2], "success": [0, 1, 2], "gridsearchq": [0, 1, 2, 5], "best_model": [0, 1, 2], "statu": [0, 1, 2], "invalid": [0, 1, 2], "timeout": [0, 1, 2], "cross_val_predict": [0, 1, 2], "expand_grid": [0, 1, 2], "group_param": [0, 1, 2], "plot": [0, 1], "binary_bias_bin": [0, 1, 2], "binary_bias_glob": [0, 1, 2], "binary_diagon": [0, 1, 2], "brokenbar_supremacy_by_drift": [0, 1, 2], "error_by_drift": [0, 1, 2], "protocol": [0, 1, 4, 5], "app": [0, 1, 2, 5], "prevalence_grid": [0, 1, 2], "samples_paramet": [0, 1, 2], "total": [0, 1, 2], "abstractprotocol": [0, 1, 2, 4], "abstractstochasticseededprotocol": [0, 1, 2], "collat": [0, 1, 2], "random_st": [0, 1, 2, 4, 5], "artificialprevalenceprotocol": [0, 1, 2], "domainmix": [0, 1, 2], "iterateprotocol": [0, 1, 2], "npp": [0, 1, 2], "naturalprevalenceprotocol": [0, 1, 2], "onlabelledcollectionprotocol": [0, 1, 2], "return_typ": [0, 1, 2], "get_col": [0, 1, 2], "get_labelled_collect": [0, 1, 2], "on_preclassified_inst": [0, 1, 2], "upp": [0, 1, 2], "uniformprevalenceprotocol": [0, 1, 2], "util": [0, 1, 3, 4], "earlystop": [0, 1, 2], "create_if_not_exist": [0, 1, 2], "create_parent_dir": [0, 1, 2], "download_fil": [0, 1, 2], "download_file_if_not_exist": [0, 1, 2], "get_quapy_hom": [0, 1, 2], "map_parallel": [0, 1, 2], "parallel": [0, 1, 2, 3, 4, 5], "parallel_unpack": [0, 1, 2], "pickled_resourc": [0, 1, 2], "save_text_fil": [0, 1, 2], "temp_se": [0, 1, 2], "search": [0, 2, 5], "page": 0, "content": 1, "implement": [2, 3, 4, 5], "measur": [2, 5], "us": [2, 3, 4, 5], "prev": [2, 4], "prevs_hat": 2, "comput": [2, 5], "absolut": [2, 5], "between": [2, 3, 5], "two": [2, 4, 5], "vector": [2, 3, 4, 5], "hat": [2, 5], "frac": [2, 5], "1": [2, 3, 4, 5], "mathcal": [2, 5], "sum_": [2, 5], "where": [2, 3, 4, 5], "ar": [2, 3, 4, 5], "class": [2, 3, 4, 5], "interest": 2, "paramet": [2, 3, 4, 5], "arrai": [2, 3, 4, 5], "like": [2, 3, 4, 5], "shape": [2, 3, 4, 5], "true": [2, 3, 4, 5], "valu": [2, 3, 4, 5], "return": [2, 3, 4, 5], "y_true": 2, "y_pred": 2, "term": [2, 3, 4, 5], "accuraci": [2, 5], "The": [2, 3, 4, 5], "tp": 2, "tn": 2, "fp": 2, "fn": 2, "stand": [2, 5], "posit": [2, 4, 5], "fals": [2, 3, 4, 5], "neg": [2, 5], "respect": [2, 5], "label": [2, 3, 4, 5], "f1": [2, 3], "simpli": [2, 5], "macro": 2, "f_1": 2, "e": [2, 3, 4, 5], "harmon": 2, "mean": [2, 3, 4, 5], "precis": 2, "recal": 2, "defin": [2, 3, 4, 5], "2tp": 2, "averag": [2, 4, 5], "each": [2, 3, 4, 5], "categori": 2, "independ": [2, 5], "err_nam": 2, "get": [2, 3, 4, 5], "an": [2, 3, 4, 5], "from": [2, 3, 4, 5], "its": [2, 3, 5], "name": [2, 3, 4, 5], "g": [2, 4, 5], "string": [2, 4, 5], "callabl": [2, 4, 5], "request": [2, 4, 5], "ep": 2, "none": [2, 3, 4, 5], "kullback": [2, 5], "leibler": [2, 5], "diverg": [2, 5], "distribut": [2, 4, 5], "d_": 2, "kl": 2, "log": [2, 4, 5], "factor": 2, "see": [2, 3, 4, 5], "case": [2, 3, 4, 5], "which": [2, 3, 4, 5], "zero": 2, "typic": [2, 3, 4, 5], "set": [2, 3, 4, 5], "2t": 2, "t": [2, 3, 5], "size": [2, 3, 4, 5], "If": [2, 4, 5], "taken": [2, 3, 4, 5], "environ": [2, 5], "variabl": [2, 4], "sample_s": [2, 5], "ha": [2, 3, 4, 5], "thu": [2, 3, 5], "beforehand": 2, "across": [2, 5], "pair": 2, "n_sampl": [2, 3], "normal": [2, 4, 5], "rel": [2, 4, 5], "squar": [2, 5], "z_": 2, "2": [2, 4, 5], "min_": [2, 5], "math": [2, 5], "2frac": 2, "underlin": 2, "displaystyl": 2, "model": [2, 3, 4, 5], "error_metr": 2, "union": [2, 4, 5], "str": [2, 4, 5], "aggr_speedup": 2, "bool": [2, 3, 5], "auto": 2, "verbos": [2, 3, 4, 5], "accord": [2, 3, 4, 5], "specif": [2, 5], "gener": [2, 3, 4, 5], "one": [2, 4, 5], "metric": [2, 5], "instanc": [2, 3, 4, 5], "object": [2, 3, 4, 5], "also": [2, 3, 5], "speed": [2, 5], "up": [2, 3, 5], "can": [2, 4, 5], "run": [2, 4, 5], "charg": [2, 4], "repres": [2, 4, 5], "": [2, 3, 4, 5], "qp": [2, 4, 5], "itself": [2, 5], "whether": [2, 3, 4, 5], "appli": [2, 3, 4, 5], "forc": 2, "even": 2, "number": [2, 3, 4, 5], "origin": [2, 4, 5], "collect": [2, 3, 4, 5], "act": 2, "larger": [2, 4, 5], "than": [2, 3, 4, 5], "default": [2, 3, 4, 5], "let": [2, 5], "decid": [2, 4], "conveni": 2, "deactiv": 2, "boolean": [2, 4, 5], "show": [2, 3, 4, 5], "inform": [2, 3, 4, 5], "stdout": 2, "score": [2, 3, 4, 5], "singl": [2, 5], "float": [2, 3, 4, 5], "iter": [2, 4, 5], "given": [2, 3, 4, 5], "list": [2, 3, 4, 5], "report": [2, 5], "panda": 2, "datafram": 2, "more": [2, 4, 5], "column": [2, 4], "estim": [2, 3, 4, 5], "mani": [2, 5], "have": [2, 4, 5], "been": [2, 3, 4, 5], "indic": [2, 3, 4, 5], "displai": [2, 3], "everi": [2, 5], "via": [2, 3, 5], "central": 2, "all": [2, 3, 4, 5], "process": [2, 4], "endow": 2, "optim": [2, 3, 5], "larg": 2, "onli": [2, 3, 4, 5], "come": [2, 4, 5], "down": [2, 4, 5], "onc": [2, 4], "over": [2, 5], "instead": [2, 4, 5], "raw": [2, 4], "so": [2, 3, 4, 5], "never": 2, "call": [2, 4, 5], "again": 2, "behaviour": 2, "obtain": [2, 3, 5], "carri": [2, 4, 5], "out": [2, 3, 4, 5], "overal": 2, "need": [2, 4, 5], "exce": 2, "undertaken": 2, "issu": [2, 5], "tupl": [2, 4, 5], "true_prev": 2, "estim_prev": 2, "element": [2, 4, 5], "ndarrai": [2, 4, 5], "q": [2, 3, 5], "hellingh": 2, "distanc": [2, 5], "hd": [2, 5], "discret": [2, 5], "k": [2, 3, 4, 5], "bin": [2, 5], "sqrt": [2, 5], "p_i": 2, "q_i": 2, "real": [2, 3, 4, 5], "1e": [2, 3, 5], "20": [2, 5], "topso": [2, 5], "left": [2, 4, 5], "right": [2, 4, 5], "prevalence_estim": 2, "_supportsarrai": 2, "dtype": [2, 4], "_nestedsequ": 2, "int": [2, 4, 5], "complex": 2, "byte": 2, "tpr": [2, 5], "fpr": [2, 5], "adjust": [2, 5], "rate": [2, 3, 5], "might": [2, 4], "rang": [2, 5], "0": [2, 3, 4, 5], "loss": [2, 3, 5], "liter": [2, 5], "ternary_search": 2, "minim": [2, 5], "strategi": 2, "possibl": [2, 5], "scipi": [2, 4], "linear": [2, 5], "problem": [2, 4, 5], "space": [2, 3, 5], "01": [2, 3, 5], "02": 2, "ternari": [2, 5], "yet": 2, "np": [2, 4, 5], "positive_preval": 2, "clip_if_necessari": 2, "helper": 2, "order": [2, 4, 5], "guarante": [2, 4, 5], "result": [2, 5], "valid": [2, 3, 4, 5], "check": 2, "rais": [2, 5], "raise_except": 2, "toleranz": 2, "08": 2, "sum": [2, 5], "otherwis": [2, 4, 5], "project": [2, 5], "proport": [2, 3, 4, 5], "probabl": [2, 3, 5], "perform": [2, 3, 5], "thei": [2, 5], "onto": [2, 5], "simplex": [2, 5], "n_instanc": [2, 3, 5], "correctli": 2, "when": [2, 3, 4, 5], "some": [2, 4, 5], "exampl": [2, 3, 4, 5], "len": 2, "occurr": [2, 4], "receiv": 2, "argument": [2, 4, 5], "That": 2, "alreadi": 2, "tri": [2, 5], "instanti": [2, 3, 5], "correspond": [2, 4, 5], "combinations_budget": 2, "n_repeat": 2, "largest": 2, "equidist": 2, "point": [2, 4, 5], "combin": [2, 5], "dimension": [2, 3, 4, 5], "do": [2, 3, 4, 5], "integ": [2, 3, 4, 5], "maximum": [2, 3, 5], "allow": [2, 3, 4, 5], "repetit": 2, "less": [2, 4, 5], "best": [2, 3, 5], "explor": 2, "step": [2, 5], "ineffici": 2, "ad": 2, "complet": [2, 5], "earli": [2, 3, 5], "literatur": 2, "A": [2, 3, 4, 5], "most": [2, 4, 5], "power": 2, "altern": [2, 5], "found": [2, 3, 4, 5], "unnormalized_arr": 2, "code": [2, 3], "adapt": [2, 3], "mathieu": 2, "blondel": 2, "bsd": 2, "licens": 2, "accompani": 2, "paper": [2, 3, 5], "akinori": 2, "fujino": 2, "naonori": 2, "ueda": 2, "scale": [2, 3, 5], "multiclass": [2, 4, 5], "support": [2, 4, 5], "machin": [2, 3], "euclidean": 2, "icpr": 2, "2014": 2, "url": 2, "n": [2, 3, 5], "v": [2, 3, 5], "matrix": [2, 5], "consist": [2, 3, 4, 5], "l1": [2, 5], "convert": [2, 3, 4, 5], "n_prevpoint": 2, "equal": [2, 5], "distant": 2, "calcul": [2, 5], "binom": 2, "c": [2, 3, 4, 5], "time": [2, 4, 5], "r": [2, 4, 5], "mass": 2, "block": 2, "alloc": [2, 3], "solut": [2, 5], "star": 2, "bar": 2, "For": [2, 4, 5], "5": [2, 3, 4, 5], "25": [2, 3, 5], "75": [2, 5], "50": [2, 5], "yield": [2, 4, 5], "smallest": 2, "lost": 2, "constrain": [2, 4], "slsqp": 2, "routin": [2, 4, 5], "posterior": [2, 3, 5], "crisp": [2, 5], "decis": [2, 3, 5], "take": [2, 4, 5], "argmax": 2, "grid_point": 2, "21": 2, "repeat": 2, "smooth_limits_epsilon": 2, "produc": 2, "uniformli": 2, "separ": [2, 4], "By": 2, "05": [2, 5], "limit": [2, 5], "10": [2, 3, 5], "15": [2, 4], "90": 2, "95": 2, "99": 2, "interv": 2, "quantiti": 2, "add": [2, 4], "subtract": [2, 4], "p_c_cond_i": 2, "p_c": [2, 5], "invers": [2, 5], "invari": [2, 5], "ratio": [2, 5], "exact": [2, 4, 5], "solv": [2, 5], "equat": [2, 5], "misclassif": [2, 5], "entri": [2, 5], "being": [2, 5], "belong": [2, 5], "end": [2, 5], "option": [2, 4, 5], "mai": 2, "exist": 2, "degener": 2, "vaz": [2, 5], "et": [2, 3, 4, 5], "al": [2, 3, 4, 5], "replac": [2, 4, 5], "last": [2, 3, 4, 5], "system": [2, 5], "rank": [2, 3, 5], "strictli": [2, 5], "full": [2, 4, 5], "deprec": [2, 5], "alwai": [2, 5], "prec": 2, "3": [2, 3, 4, 5], "represent": [2, 3, 5], "33": 2, "67": 2, "kraemer": 2, "algorithm": [2, 4, 5], "random": [2, 4, 5], "unit": [2, 5], "post": 2, "stackexchang": 2, "question": 2, "3227": 2, "uniform": [2, 4, 5], "_": [2, 4, 5], "param": [2, 3, 5], "msg": 2, "param_grid": [2, 5], "dict": [2, 4, 5], "type": [2, 4, 5], "refit": 2, "n_job": [2, 3, 4, 5], "raise_error": 2, "grid": [2, 5], "target": [2, 3, 5], "orient": [2, 5], "hyperparamet": [2, 5], "dictionari": [2, 3, 4, 5], "kei": [2, 4], "ones": [2, 4, 5], "those": [2, 3, 5], "quantification_error": 2, "whole": [2, 3], "chosen": 2, "ignor": [2, 4, 5], "gen": 2, "establish": 2, "timer": 2, "second": [2, 4], "configur": [2, 5], "test": [2, 3, 4, 5], "whenev": 2, "longer": [2, 5], "timeouterror": 2, "except": [2, 5], "bound": [2, 5], "ani": [2, 3, 4, 5], "mark": 2, "goe": 2, "howev": 2, "valueerror": 2, "through": 2, "after": [2, 5], "hyper": [2, 3, 5], "learn": [2, 3, 4, 5], "select": [2, 4, 5], "self": [2, 3, 4, 5], "deep": [2, 5], "unus": [2, 3], "contanin": 2, "enum": 2, "enumer": 2, "4": [2, 4], "nfold": [2, 4], "akin": [2, 5], "scikit": [2, 3, 4, 5], "fold": [2, 4, 5], "cross": [2, 3, 4, 5], "seed": [2, 4, 5], "reproduc": [2, 4], "expand": 2, "100": [2, 3, 4, 5], "b": [2, 4, 5], "print": [2, 3, 4], "assign": [2, 4], "partit": [2, 3], "anoth": [2, 5], "que": 2, "method_nam": 2, "pos_class": [2, 4], "titl": 2, "nbin": [2, 5], "colormap": 2, "matplotlib": 2, "color": 2, "listedcolormap": 2, "vertical_xtick": 2, "legend": 2, "savepath": 2, "box": 2, "local": 2, "bia": [2, 3, 5], "sign": 2, "minu": 2, "differ": [2, 4, 5], "classs": 2, "experi": [2, 4], "compon": [2, 3, 5], "cm": 2, "tab10": 2, "secondari": 2, "path": [2, 3, 4, 5], "save": [2, 4], "shown": 2, "global": 2, "show_std": 2, "train_prev": 2, "method_ord": 2, "diagon": 2, "along": [2, 5], "axi": 2, "describ": [2, 5], "henc": [2, 4, 5], "It": [2, 4], "though": 2, "other": [2, 4, 5], "prefer": 2, "deviat": [2, 4], "band": 2, "inconveni": 2, "compar": 2, "high": [2, 5], "leyend": 2, "hightlight": 2, "conduct": 2, "same": [2, 4, 5], "impos": 2, "associ": 2, "tr_prev": [2, 5], "n_bin": [2, 5], "isomer": 2, "x_error": 2, "y_error": 2, "ttest_alpha": 2, "005": 2, "tail_density_threshold": 2, "top": [2, 5], "region": 2, "shift": [2, 3, 5], "form": [2, 4, 5], "broken": 2, "chart": 2, "either": 2, "follow": [2, 4, 5], "hold": [2, 5], "ii": 2, "statist": [2, 5], "significantli": 2, "side": 2, "confid": 2, "made": [2, 4, 5], "isometr": 2, "percentil": 2, "divid": 2, "amount": [2, 5], "abov": 2, "consid": [2, 3, 4, 5], "involv": 2, "similar": [2, 5], "threshold": [2, 5], "densiti": [2, 5], "below": [2, 4], "tail": 2, "avoid": 2, "outlier": 2, "error_nam": 2, "show_dens": 2, "show_legend": 2, "logscal": 2, "vline": 2, "especi": 2, "cumberson": 2, "gain": 2, "understand": 2, "about": [2, 4, 5], "how": [2, 4, 5], "fare": 2, "prior": [2, 5], "spectrum": 2, "low": [2, 3], "regim": 2, "highlight": 2, "vertic": 2, "dot": 2, "line": 2, "n_preval": 2, "sanity_check": 2, "10000": [2, 5], "sample_prev": 2, "artifici": 2, "drawn": [2, 4], "extract": [2, 4, 5], "copi": [2, 4], "replic": 2, "sequenc": 2, "user": 2, "skip": 2, "labelled_collect": 2, "exhaust": 2, "depend": [2, 5], "11": 2, "9": 2, "implicit": 2, "return_constrained_dim": 2, "rest": [2, 3, 4], "note": [2, 4], "quit": 2, "obvious": 2, "doe": [2, 5], "determinist": 2, "anywher": 2, "multipli": 2, "realiz": 2, "necessari": [2, 5], "abstract": [2, 3, 4, 5], "parent": 2, "known": [2, 5], "procedur": 2, "enforc": 2, "fulli": 2, "In": [2, 3, 4, 5], "make": [2, 5], "extend": [2, 5], "input": [2, 3, 4, 5], "arg": [2, 3, 4, 5], "prepar": 2, "accommod": 2, "desir": [2, 4], "output": [2, 3, 4, 5], "format": [2, 4, 5], "befor": [2, 3, 4, 5], "inherit": 2, "custom": [2, 4], "addit": 2, "adher": 2, "properti": [2, 3, 4, 5], "determin": 2, "serv": [2, 4], "alia": [2, 4, 5], "domaina": 2, "domainb": 2, "mixture_point": 2, "mixtur": [2, 5], "domain": 2, "control": 2, "preserv": [2, 4], "draw": [2, 5], "specifi": [2, 3, 4, 5], "should": [2, 3, 4, 5], "zip": 2, "veri": 2, "simpl": [2, 5], "previous": [2, 5], "natur": 2, "therefor": 2, "approxim": [2, 3], "classmethod": [2, 4, 5], "pre_classif": 2, "in_plac": 2, "modifi": 2, "version": [2, 3], "pre": 2, "advanc": 2, "hard": [2, 3, 5], "modif": 2, "place": [2, 4], "new": [2, 4], "variant": [2, 5], "reli": [2, 5], "cover": [2, 3], "entir": 2, "sens": 2, "unlik": 2, "endeavour": 2, "intract": 2, "patienc": [2, 3, 5], "lower_is_bett": 2, "stop": [2, 3, 5], "network": [2, 3, 4, 5], "epoch": [2, 3, 5], "7": [2, 3, 5], "improv": [2, 3, 5], "best_epoch": 2, "best_scor": 2, "consecut": [2, 3, 4, 5], "monitor": 2, "obtaind": 2, "held": [2, 3, 5], "split": [2, 3, 4, 5], "wors": 2, "far": [2, 3, 4], "flag": 2, "keep": [2, 4], "track": 2, "seen": [2, 5], "wa": [2, 4, 5], "o": 2, "makedir": 2, "exist_ok": 2, "dir": [2, 5], "subdir": 2, "anotherdir": 2, "creat": [2, 5], "file": [2, 3, 4, 5], "txt": 2, "archive_filenam": 2, "download": [2, 4], "destin": 2, "filenam": 2, "dowload": 2, "home": [2, 4], "directori": [2, 3, 4, 5], "perman": 2, "quapy_data": 2, "func": 2, "slice": 2, "item": 2, "work": [2, 4, 5], "pass": [2, 3, 5], "worker": [2, 3, 4, 5], "asarrai": 2, "backend": [2, 5], "loki": [2, 5], "wrapper": [2, 3, 4, 5], "multiprocess": [2, 5], "delai": 2, "args_i": 2, "silent": [2, 5], "child": 2, "ensur": 2, "numer": [2, 4, 5], "handl": 2, "open_arg": 2, "pickle_path": 2, "generation_func": 2, "fast": [2, 4], "reus": [2, 4], "resourc": 2, "next": [2, 3, 4], "invok": [2, 4], "pickl": [2, 4, 5], "def": 2, "some_arrai": 2, "mock": [2, 3], "rand": 2, "my_arrai": 2, "pkl": 2, "first": [2, 4, 5], "text": [2, 3, 4, 5], "disk": [2, 4], "miss": 2, "context": 2, "tempor": [2, 3], "without": [2, 4], "outer": 2, "numpi": [2, 3], "current": [2, 3, 4, 5], "state": 2, "random_se": 2, "within": [2, 5], "launch": 2, "close": [2, 4, 5], "start_msg": 2, "end_msg": 2, "sleep": 2, "begin": 2, "correct": [3, 5], "temperatur": [3, 5], "bct": [3, 5], "abstent": 3, "alexandari": [3, 5], "stratifi": [3, 4, 5], "retrain": 3, "afterward": [3, 5], "No": [3, 5], "nbv": [3, 5], "re": [3, 4], "kundaj": 3, "shrikumar": 3, "2020": 3, "novemb": 3, "likelihood": [3, 5], "beat": [3, 5], "intern": [3, 4, 5], "confer": [3, 4], "pp": 3, "222": 3, "232": 3, "pmlr": 3, "baseestim": [3, 5], "calibratorfactori": 3, "n_featur": [3, 5], "manner": [3, 5], "val": [3, 4], "These": [3, 5], "n_compon": 3, "kwarg": [3, 4, 5], "embed": [3, 5], "requir": [3, 4, 5], "quanet": [3, 5], "easili": 3, "sklearn": [3, 4, 5], "decomposit": 3, "truncatedsvd": 3, "while": [3, 4, 5], "linear_model": 3, "logisticregress": [3, 5], "princip": 3, "retain": [3, 5], "logist": [3, 5], "regress": 3, "map": [3, 5], "length": [3, 4], "eventu": [3, 4], "unalt": 3, "emb": 3, "embedding_s": 3, "hidden_s": 3, "256": 3, "repr_siz": 3, "kernel_height": 3, "stride": 3, "pad": [3, 4], "drop_p": 3, "convolut": 3, "vocabulari": [3, 4], "word": [3, 4, 5], "hidden": [3, 5], "document": [3, 4, 5], "kernel": [3, 5], "token": [3, 4], "drop": 3, "dropout": [3, 5], "layer": [3, 5], "batch": 3, "torch": [3, 5], "dataload": 3, "tensor": 3, "n_dimens": [3, 5], "lstm_class_nlay": 3, "long": 3, "short": 3, "memori": 3, "lstm": [3, 5], "net": 3, "lr": [3, 5], "001": [3, 5], "weight_decai": 3, "200": 3, "batch_siz": 3, "64": [3, 5], "batch_size_test": 3, "512": [3, 5], "padding_length": 3, "300": 3, "cuda": [3, 5], "checkpointpath": 3, "checkpoint": [3, 5], "classifier_net": 3, "dat": 3, "weight": [3, 4], "decai": 3, "wait": 3, "cpu": [3, 5], "enabl": 3, "gpu": [3, 5], "store": [3, 4, 5], "vocab_s": 3, "reiniti": 3, "trainer": 3, "learner": [3, 5], "disjoint": 3, "embed_s": 3, "nn": 3, "pad_length": 3, "xavier": 3, "initi": [3, 5], "shuffl": [3, 4], "dynam": [3, 4, 5], "longest": 3, "shorter": 3, "svmperf_bas": [3, 5], "host_fold": 3, "classifiermixin": 3, "svm": [3, 4, 5], "perf": [3, 5], "thorsten": 3, "joachim": [3, 5], "patch": [3, 5], "instal": [3, 5], "further": [3, 4, 5], "detail": [3, 4, 5], "refer": [3, 4], "esuli": [3, 4, 5], "2015": [3, 5], "barranquero": [3, 5], "svm_perf_learn": 3, "svm_perf_classifi": 3, "trade": [3, 5], "off": [3, 5], "margin": [3, 5], "std": 3, "avail": [3, 4, 5], "qacc": 3, "qf1": 3, "qgm": 3, "tmp": 3, "automat": 3, "delet": 3, "multivari": 3, "12": 3, "26": 3, "27": 3, "13": 3, "22": [3, 4], "23": 3, "24": 3, "textual": 4, "train_siz": 4, "6": 4, "conform": 4, "nrepeat": 4, "around": [4, 5], "round": 4, "train_path": 4, "test_path": 4, "loader_func": 4, "loader_kwarg": 4, "read": 4, "must": [4, 5], "loader": 4, "n_train": 4, "n_test": 4, "quick": 4, "kindl": [4, 5], "tfidf": 4, "min_df": [4, 5], "tr": 4, "3821": 4, "te": 4, "21591": 4, "spars": 4, "csr": 4, "csr_matrix": 4, "featur": [4, 5], "4403": 4, "081": 4, "919": 4, "063": 4, "937": 4, "dedic": 4, "attach": 4, "them": [4, 5], "sever": 4, "infer": 4, "linearsvc": 4, "my_collect": 4, "codefram": 4, "both": 4, "frequenc": [4, 5], "actual": [4, 5], "lead": 4, "empti": 4, "sinc": [4, 5], "met": 4, "whose": [4, 5], "train_prop": 4, "randomli": 4, "stratif": 4, "greater": 4, "single_sample_train": 4, "for_model_select": 4, "data_hom": 4, "ifcb": 4, "zenodo": 4, "pleas": 4, "link": 4, "publicli": 4, "whoi": 4, "plankton": 4, "repo": 4, "script": [4, 5], "gonz\u00e1lez": [4, 5], "basic": [4, 5], "precomput": 4, "togeth": 4, "individu": 4, "30": [4, 5], "86": 4, "286": 4, "dump": 4, "leav": 4, "quay_data": 4, "test_gen": 4, "_ifcb": 4, "ifcbtrainsamplesfromdir": 4, "seri": 4, "ifcbtestsampl": 4, "dataset_nam": 4, "test_split": 4, "uci": 4, "p\u00e9rez": [4, 5], "g\u00e1llego": [4, 5], "quevedo": 4, "j": [4, 5], "del": 4, "coz": 4, "2017": [4, 5], "characteriz": 4, "chang": 4, "studi": 4, "fusion": 4, "34": [4, 5], "87": 4, "castano": 4, "2019": [4, 5], "task": 4, "45": 4, "predefin": 4, "fetch_ucilabelledcollect": 4, "access": [4, 5], "uci_dataset": 4, "ml": [4, 5], "repositori": 4, "adopt": 4, "5fcvx2": 4, "x2": 4, "import": [4, 5], "yeast": 4, "archiv": 4, "ic": 4, "edu": 4, "criteria": 4, "1000": [4, 5], "suit": 4, "ucimlrepo": 4, "dry": 4, "bean": 4, "uci_multiclass_dataset": 4, "offici": 4, "provid": [4, 5], "lequa": 4, "competit": 4, "brief": 4, "t1a": 4, "t1b": 4, "t2a": 4, "t2b": 4, "sentiment": 4, "28": 4, "merchandis": 4, "product": 4, "we": 4, "moreo": [4, 5], "sebastiani": [4, 5], "f": [4, 5], "sperduti": 4, "2022": [4, 5], "overview": 4, "clef": 4, "descript": 4, "lequa2022_experi": 4, "py": 4, "folder": [4, 5], "guid": 4, "val_gen": 4, "_lequa2022": 4, "samplesfromdir": 4, "subclass": [4, 5], "review": 4, "recurr": 4, "proceed": [4, 5], "27th": 4, "acm": [4, 5], "knowledg": 4, "manag": 4, "2018": [2, 4, 5], "reviews_sentiment_dataset": 4, "hp": 4, "imdb": 4, "matric": 4, "minimun": 4, "kept": 4, "faster": 4, "subsequ": 4, "twitter": 4, "gao": [4, 5], "w": 4, "tweet": 4, "analysi": 4, "social": 4, "mining6": 4, "19": 4, "2016": [4, 5], "semeval13": 4, "semeval14": 4, "semeval15": 4, "share": 4, "twitter_sentiment_datasets_train": 4, "twitter_sentiment_datasets_test": 4, "gasp": 4, "hcr": 4, "omd": 4, "sander": 4, "semeval16": 4, "sst": 4, "wb": 4, "devel": 4, "style": 4, "id": 4, "would": [4, 5], "countvector": 4, "keyword": [4, 5], "nogap": 4, "regardless": 4, "special": 4, "codifi": 4, "unknown": 4, "surfac": 4, "assert": 4, "gap": 4, "preced": 4, "inplac": [4, 5], "To": 4, "uniqu": 4, "rare": 4, "occur": 4, "unk": 4, "minimum": [4, 5], "org": [4, 5], "stabl": 4, "feature_extract": 4, "html": [4, 5], "subtyp": 4, "spmatrix": 4, "remov": [4, 5], "present": 4, "least": 4, "infrequ": 4, "aka": [4, 5], "z": 4, "sublinear_tf": 4, "part": 4, "scall": 4, "tf": 4, "counter": 4, "tfidfvector": 4, "categor": 4, "toward": [4, 5], "whcih": 4, "had": 4, "encod": 4, "utf": 4, "8": [4, 5], "csv": 4, "feat1": 4, "feat2": 4, "featn": 4, "covari": [4, 5], "express": 4, "col": 4, "row": 4, "class2int": 4, "collet": 4, "fomart": 4, "progress": 4, "sentenc": 4, "classnam": 4, "u1": 4, "springer": [], "articl": [], "1007": [], "s10618": [], "008": [], "0097": [], "invert": 5, "l2": 5, "norm": 5, "ax": 5, "better": 5, "consult": 5, "buns": 5, "On": 5, "multi": 5, "extens": 5, "2nd": 5, "workshop": 5, "applic": 5, "lq": 5, "ecml": 5, "pkdd": 5, "grenobl": 5, "franc": 5, "classif_predict": 5, "y_": 5, "construct": 5, "jmlr": 5, "v20": 5, "18": 5, "456": 5, "abc": 5, "base_quantifi": 5, "median": 5, "parameter": 5, "parllel": 5, "subobject": 5, "well": 5, "nest": 5, "pipelin": 5, "latter": 5, "__": 5, "updat": 5, "reason": 5, "phase": 5, "classification_fit": 5, "maintain": 5, "attribut": 5, "give": 5, "fit_classifi": 5, "predict_on": 5, "outsid": 5, "remaind": 5, "expect": 5, "non": 5, "soft": 5, "num_warmup": 5, "500": 5, "num_sampl": 5, "mcmc_seed": 5, "bayesian": 5, "rather": 5, "diagnos": 5, "degeneraci": 5, "visibl": 5, "confus": 5, "uncertainti": 5, "extra": 5, "bay": 5, "warmup": 5, "mcmc": 5, "sampler": 5, "One": 5, "noth": 5, "here": 5, "cdf": 5, "match": 5, "helling": 5, "sought": 5, "choic": 5, "channel": 5, "proper": 5, "ch": 5, "particular": 5, "di": 5, "dij": 5, "fraction": 5, "th": 5, "tol": 5, "find": 5, "got": 5, "dl": 5, "doi": 5, "1145": 5, "3219819": 5, "3220059": 5, "histogram": 5, "toler": 5, "classif_posterior": 5, "exact_train_prev": 5, "recalib": 5, "maxim": 5, "saeren": 5, "latinn": 5, "decaesteck": 5, "mutual": 5, "recurs": 5, "wai": 5, "until": 5, "converg": 5, "heurist": 5, "propos": 5, "recalibr": 5, "meant": 5, "messag": 5, "observ": 5, "posterior_prob": 5, "0001": 5, "reach": 5, "loop": 5, "ir": 5, "accordingli": 5, "unlabel": 5, "binary_quantifi": 5, "parallel_backend": 5, "prevel": 5, "emploi": 5, "joblib": 5, "help": 5, "elm": 5, "cannot": 5, "temp": 5, "dure": 5, "resp": 5, "simplif": 5, "conceptu": 5, "equival": 5, "explicit": 5, "famili": 5, "structur": 5, "purpos": 5, "svmperf_hom": 5, "properli": 5, "underli": 5, "2021": 5, "_kdei": 5, "common": 5, "ancestor": 5, "kde": 5, "scott": 5, "silverman": 5, "bandwidth": 5, "wrap": 5, "kerneldens": 5, "evalut": 5, "kdei": 5, "cauchi": 5, "schwarz": 5, "author": 5, "mont": 5, "carlo": 5, "approach": 5, "alpha": 5, "delta": 5, "d": 5, "boldsymbol": 5, "q_": 5, "widetild": 5, "u": 5, "p_": 5, "alpha_i": 5, "l": 5, "_i": 5, "p_x": 5, "x_i": 5, "h": 5, "datapoint": 5, "center": 5, "mathrm": 5, "dx": 5, "2dx": 5, "admit": 5, "montecarlo_tri": 5, "disntac": 5, "_f": 5, "trial": 5, "x_1": 5, "ldot": 5, "x_t": 5, "sim_": 5, "iid": 5, "criterion": 5, "mathbb": 5, "_neural": 5, "doc_embedding_s": 5, "stats_siz": 5, "lstm_hidden_s": 5, "lstm_nlayer": 5, "ff_layer": 5, "1024": 5, "bidirect": 5, "qdrop_p": 5, "order_bi": 5, "cell": 5, "dens": 5, "connect": 5, "ff": 5, "sort": 5, "doc_embed": 5, "doc_posterior": 5, "overridden": 5, "although": 5, "recip": 5, "former": 5, "care": 5, "regist": 5, "hook": 5, "n_epoch": 5, "tr_iter_per_poch": 5, "va_iter_per_poch": 5, "checkpointdir": 5, "checkpointnam": 5, "pytorch": 5, "advantag": 5, "cnn": 5, "estim_preval": 5, "anyth": 5, "40": 5, "66": 5, "ground": 5, "truth": 5, "_threshold_optim": 5, "forman": 5, "2006": 5, "2008": 5, "look": 5, "goal": 5, "bring": 5, "stabil": 5, "denomin": 5, "sweep": 5, "closest": 5, "choos": 5, "deliv": 5, "interpret": 5, "complement": 5, "param_mod_sel": 5, "param_model_sel": 5, "red_siz": 5, "min_po": 5, "polici": 5, "av": 5, "max_sample_s": 5, "ptr": 5, "member": 5, "preliminari": 5, "final": 5, "recomput": 5, "static": 5, "compat": 5, "recommend": 5, "gridsearchcv": 5, "base_quantifier_class": 5, "factori": 5, "unifi": 5, "interfac": 5, "logspac": 5, "class_weight": 5, "balanc": 5, "110": 5, "setup": 5, "mimick": 5, "castro": 5, "alaiz": 5, "rodr\u00edguez": 5, "alegr": 5, "2013": 5, "nfeat": 5, "dissimilar": 5, "mlpe": 5, "lazi": 5, "assum": 5, "put": 5, "assumpion": 5, "irrespect": 5, "lower": 5, "estimant": 5, "bootstrap_tri": 5, "bootstrap_rang": 5, "bagging_tri": 5, "bagging_rang": 5, "vectorizer_kwarg": 5, "class_cond_x": 5, "hat_yi": 5, "yj": 5, "yi": 5}, "objects": {"": [[2, 0, 0, "-", "quapy"]], "quapy": [[3, 0, 0, "-", "classification"], [4, 0, 0, "-", "data"], [2, 0, 0, "-", "error"], [2, 0, 0, "-", "evaluation"], [2, 0, 0, "-", "functional"], [5, 0, 0, "-", "method"], [2, 0, 0, "-", "model_selection"], [2, 0, 0, "-", "plot"], [2, 0, 0, "-", "protocol"], [2, 0, 0, "-", "util"]], "quapy.classification": [[3, 0, 0, "-", "calibration"], [3, 0, 0, "-", "methods"], [3, 0, 0, "-", "neural"], [3, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[3, 1, 1, "", "BCTSCalibration"], [3, 1, 1, "", "NBVSCalibration"], [3, 1, 1, "", "RecalibratedProbabilisticClassifier"], [3, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [3, 1, 1, "", "TSCalibration"], [3, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[3, 2, 1, "", "classes_"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "fit_cv"], [3, 3, 1, "", "fit_tr_val"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[3, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"], [3, 3, 1, "", "set_params"], [3, 3, 1, "", "transform"]], "quapy.classification.neural": [[3, 1, 1, "", "CNNnet"], [3, 1, 1, "", "LSTMnet"], [3, 1, 1, "", "NeuralClassifierTrainer"], [3, 1, 1, "", "TextClassifierNet"], [3, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "get_params"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "get_params"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[3, 2, 1, "", "device"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"], [3, 3, 1, "", "reset_net_params"], [3, 3, 1, "", "set_params"], [3, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[3, 3, 1, "", "dimensions"], [3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "forward"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict_proba"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"], [3, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[3, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[3, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[3, 3, 1, "", "decision_function"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "predict"], [3, 4, 1, "", "valid_losses"]], "quapy.data": [[4, 0, 0, "-", "base"], [4, 0, 0, "-", "datasets"], [4, 0, 0, "-", "preprocessing"], [4, 0, 0, "-", "reader"]], "quapy.data.base": [[4, 1, 1, "", "Dataset"], [4, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[4, 3, 1, "", "SplitStratified"], [4, 2, 1, "", "binary"], [4, 2, 1, "", "classes_"], [4, 3, 1, "", "kFCV"], [4, 3, 1, "", "load"], [4, 2, 1, "", "n_classes"], [4, 3, 1, "", "reduce"], [4, 3, 1, "", "stats"], [4, 2, 1, "", "train_test"], [4, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[4, 2, 1, "", "X"], [4, 2, 1, "", "Xp"], [4, 2, 1, "", "Xy"], [4, 2, 1, "", "binary"], [4, 3, 1, "", "counts"], [4, 3, 1, "", "join"], [4, 3, 1, "", "kFCV"], [4, 3, 1, "", "load"], [4, 2, 1, "", "n_classes"], [4, 2, 1, "", "p"], [4, 3, 1, "", "prevalence"], [4, 3, 1, "", "sampling"], [4, 3, 1, "", "sampling_from_index"], [4, 3, 1, "", "sampling_index"], [4, 3, 1, "", "split_random"], [4, 3, 1, "", "split_stratified"], [4, 3, 1, "", "stats"], [4, 3, 1, "", "uniform_sampling"], [4, 3, 1, "", "uniform_sampling_index"], [4, 2, 1, "", "y"]], "quapy.data.datasets": [[4, 5, 1, "", "fetch_IFCB"], [4, 5, 1, "", "fetch_UCIBinaryDataset"], [4, 5, 1, "", "fetch_UCIBinaryLabelledCollection"], [4, 5, 1, "", "fetch_UCIMulticlassDataset"], [4, 5, 1, "", "fetch_UCIMulticlassLabelledCollection"], [4, 5, 1, "", "fetch_lequa2022"], [4, 5, 1, "", "fetch_reviews"], [4, 5, 1, "", "fetch_twitter"], [4, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[4, 1, 1, "", "IndexTransformer"], [4, 5, 1, "", "index"], [4, 5, 1, "", "reduce_columns"], [4, 5, 1, "", "standardize"], [4, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[4, 3, 1, "", "add_word"], [4, 3, 1, "", "fit"], [4, 3, 1, "", "fit_transform"], [4, 3, 1, "", "transform"], [4, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[4, 5, 1, "", "binarize"], [4, 5, 1, "", "from_csv"], [4, 5, 1, "", "from_sparse"], [4, 5, 1, "", "from_text"], [4, 5, 1, "", "reindex_labels"]], "quapy.error": [[2, 5, 1, "", "absolute_error"], [2, 5, 1, "", "acc_error"], [2, 5, 1, "", "acce"], [2, 5, 1, "", "ae"], [2, 5, 1, "", "f1_error"], [2, 5, 1, "", "f1e"], [2, 5, 1, "", "from_name"], [2, 5, 1, "", "kld"], [2, 5, 1, "", "mae"], [2, 5, 1, "", "mean_absolute_error"], [2, 5, 1, "", "mean_normalized_absolute_error"], [2, 5, 1, "", "mean_normalized_relative_absolute_error"], [2, 5, 1, "", "mean_relative_absolute_error"], [2, 5, 1, "", "mkld"], [2, 5, 1, "", "mnae"], [2, 5, 1, "", "mnkld"], [2, 5, 1, "", "mnrae"], [2, 5, 1, "", "mrae"], [2, 5, 1, "", "mse"], [2, 5, 1, "", "nae"], [2, 5, 1, "", "nkld"], [2, 5, 1, "", "normalized_absolute_error"], [2, 5, 1, "", "normalized_relative_absolute_error"], [2, 5, 1, "", "nrae"], [2, 5, 1, "", "rae"], [2, 5, 1, "", "relative_absolute_error"], [2, 5, 1, "", "se"], [2, 5, 1, "", "smooth"]], "quapy.evaluation": [[2, 5, 1, "", "evaluate"], [2, 5, 1, "", "evaluate_on_samples"], [2, 5, 1, "", "evaluation_report"], [2, 5, 1, "", "prediction"]], "quapy.functional": [[2, 5, 1, "", "HellingerDistance"], [2, 5, 1, "", "TopsoeDistance"], [2, 5, 1, "", "adjusted_quantification"], [2, 5, 1, "", "argmin_prevalence"], [2, 5, 1, "", "as_binary_prevalence"], [2, 5, 1, "", "check_prevalence_vector"], [2, 5, 1, "", "clip_prevalence"], [2, 5, 1, "", "counts_from_labels"], [2, 5, 1, "", "get_divergence"], [2, 5, 1, "", "get_nprevpoints_approximation"], [2, 5, 1, "", "linear_search"], [2, 5, 1, "", "map_onto_probability_simplex"], [2, 5, 1, "", "normalize_prevalence"], [2, 5, 1, "", "num_prevalence_combinations"], [2, 5, 1, "", "optim_minimize"], [2, 5, 1, "", "prevalence_from_labels"], [2, 5, 1, "", "prevalence_from_probabilities"], [2, 5, 1, "", "prevalence_linspace"], [2, 5, 1, "", "solve_adjustment"], [2, 5, 1, "", "strprev"], [2, 5, 1, "", "uniform_prevalence_sampling"], [2, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[5, 0, 0, "-", "_kdey"], [5, 0, 0, "-", "_neural"], [5, 0, 0, "-", "_threshold_optim"], [5, 0, 0, "-", "aggregative"], [5, 0, 0, "-", "base"], [5, 0, 0, "-", "meta"], [5, 0, 0, "-", "non_aggregative"]], "quapy.method._kdey": [[5, 1, 1, "", "KDEBase"], [5, 1, 1, "", "KDEyCS"], [5, 1, 1, "", "KDEyHD"], [5, 1, 1, "", "KDEyML"]], "quapy.method._kdey.KDEBase": [[5, 4, 1, "", "BANDWIDTH_METHOD"], [5, 3, 1, "", "get_kde_function"], [5, 3, 1, "", "get_mixture_components"], [5, 3, 1, "", "pdf"]], "quapy.method._kdey.KDEyCS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "gram_matrix_mix_sum"]], "quapy.method._kdey.KDEyHD": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method._kdey.KDEyML": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method._neural": [[5, 1, 1, "", "QuaNetModule"], [5, 1, 1, "", "QuaNetTrainer"], [5, 5, 1, "", "mae_loss"]], "quapy.method._neural.QuaNetModule": [[5, 2, 1, "", "device"], [5, 3, 1, "", "forward"], [5, 4, 1, "", "training"]], "quapy.method._neural.QuaNetTrainer": [[5, 2, 1, "", "classes_"], [5, 3, 1, "", "clean_checkpoint"], [5, 3, 1, "", "clean_checkpoint_dir"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method._threshold_optim": [[5, 1, 1, "", "MAX"], [5, 1, 1, "", "MS"], [5, 1, 1, "", "MS2"], [5, 1, 1, "", "T50"], [5, 1, 1, "", "ThresholdOptimization"], [5, 1, 1, "", "X"]], "quapy.method._threshold_optim.MAX": [[5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS2": [[5, 3, 1, "", "discard"]], "quapy.method._threshold_optim.T50": [[5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.ThresholdOptimization": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregate_with_threshold"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "condition"], [5, 3, 1, "", "discard"]], "quapy.method._threshold_optim.X": [[5, 3, 1, "", "condition"]], "quapy.method.aggregative": [[5, 1, 1, "", "ACC"], [5, 4, 1, "", "AdjustedClassifyAndCount"], [5, 1, 1, "", "AggregativeCrispQuantifier"], [5, 1, 1, "", "AggregativeMedianEstimator"], [5, 1, 1, "", "AggregativeQuantifier"], [5, 1, 1, "", "AggregativeSoftQuantifier"], [5, 1, 1, "", "BayesianCC"], [5, 1, 1, "", "BinaryAggregativeQuantifier"], [5, 1, 1, "", "CC"], [5, 4, 1, "", "ClassifyAndCount"], [5, 1, 1, "", "DMy"], [5, 4, 1, "", "DistributionMatchingY"], [5, 1, 1, "", "DyS"], [5, 1, 1, "", "EMQ"], [5, 4, 1, "", "ExpectationMaximizationQuantifier"], [5, 1, 1, "", "HDy"], [5, 4, 1, "", "HellingerDistanceY"], [5, 1, 1, "", "OneVsAllAggregative"], [5, 1, 1, "", "PACC"], [5, 1, 1, "", "PCC"], [5, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [5, 4, 1, "", "ProbabilisticClassifyAndCount"], [5, 4, 1, "", "SLD"], [5, 1, 1, "", "SMM"], [5, 5, 1, "", "newELM"], [5, 5, 1, "", "newSVMAE"], [5, 5, 1, "", "newSVMKLD"], [5, 5, 1, "", "newSVMQ"], [5, 5, 1, "", "newSVMRAE"]], "quapy.method.aggregative.ACC": [[5, 4, 1, "", "CLIPPING"], [5, 4, 1, "", "METHODS"], [5, 4, 1, "", "SOLVERS"], [5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "getPteCondEstim"], [5, 3, 1, "", "newInvariantRatioEstimation"]], "quapy.method.aggregative.AggregativeMedianEstimator": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.aggregative.AggregativeQuantifier": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 2, 1, "", "classes_"], [5, 2, 1, "", "classifier"], [5, 3, 1, "", "classifier_fit_predict"], [5, 3, 1, "", "classify"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"], [5, 2, 1, "", "val_split"], [5, 4, 1, "", "val_split_"]], "quapy.method.aggregative.BayesianCC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "get_conditional_probability_samples"], [5, 3, 1, "", "get_prevalence_samples"], [5, 3, 1, "", "sample_from_posterior"]], "quapy.method.aggregative.BinaryAggregativeQuantifier": [[5, 3, 1, "", "fit"], [5, 2, 1, "", "neg_label"], [5, 2, 1, "", "pos_label"]], "quapy.method.aggregative.CC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DMy": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DyS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.EMQ": [[5, 3, 1, "", "EM"], [5, 3, 1, "", "EMQ_BCTS"], [5, 4, 1, "", "EPSILON"], [5, 4, 1, "", "MAX_ITER"], [5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "classify"], [5, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.OneVsAllAggregative": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "classify"]], "quapy.method.aggregative.PACC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.SMM": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.base": [[5, 1, 1, "", "BaseQuantifier"], [5, 1, 1, "", "BinaryQuantifier"], [5, 1, 1, "", "OneVsAll"], [5, 1, 1, "", "OneVsAllGeneric"], [5, 5, 1, "", "newOneVsAll"]], "quapy.method.base.BaseQuantifier": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[5, 2, 1, "", "classes_"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.meta": [[5, 5, 1, "", "EACC"], [5, 5, 1, "", "ECC"], [5, 5, 1, "", "EEMQ"], [5, 5, 1, "", "EHDy"], [5, 5, 1, "", "EPACC"], [5, 1, 1, "", "Ensemble"], [5, 1, 1, "", "MedianEstimator"], [5, 1, 1, "", "MedianEstimator2"], [5, 5, 1, "", "ensembleFactory"], [5, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[5, 4, 1, "", "VALID_POLICIES"], [5, 2, 1, "", "aggregative"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 2, 1, "", "probabilistic"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator2": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[5, 1, 1, "", "DMx"], [5, 4, 1, "", "DistributionMatchingX"], [5, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"], [5, 1, 1, "", "ReadMe"]], "quapy.method.non_aggregative.DMx": [[5, 3, 1, "", "HDx"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.ReadMe": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "std_constrained_linear_ls"]], "quapy.model_selection": [[2, 1, 1, "", "ConfigStatus"], [2, 1, 1, "", "GridSearchQ"], [2, 1, 1, "", "Status"], [2, 5, 1, "", "cross_val_predict"], [2, 5, 1, "", "expand_grid"], [2, 5, 1, "", "group_params"]], "quapy.model_selection.ConfigStatus": [[2, 3, 1, "", "failed"], [2, 3, 1, "", "success"]], "quapy.model_selection.GridSearchQ": [[2, 3, 1, "", "best_model"], [2, 3, 1, "", "fit"], [2, 3, 1, "", "get_params"], [2, 3, 1, "", "quantify"], [2, 3, 1, "", "set_params"]], "quapy.model_selection.Status": [[2, 4, 1, "", "ERROR"], [2, 4, 1, "", "INVALID"], [2, 4, 1, "", "SUCCESS"], [2, 4, 1, "", "TIMEOUT"]], "quapy.plot": [[2, 5, 1, "", "binary_bias_bins"], [2, 5, 1, "", "binary_bias_global"], [2, 5, 1, "", "binary_diagonal"], [2, 5, 1, "", "brokenbar_supremacy_by_drift"], [2, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[2, 1, 1, "", "APP"], [2, 1, 1, "", "AbstractProtocol"], [2, 1, 1, "", "AbstractStochasticSeededProtocol"], [2, 4, 1, "", "ArtificialPrevalenceProtocol"], [2, 1, 1, "", "DomainMixer"], [2, 1, 1, "", "IterateProtocol"], [2, 1, 1, "", "NPP"], [2, 4, 1, "", "NaturalPrevalenceProtocol"], [2, 1, 1, "", "OnLabelledCollectionProtocol"], [2, 1, 1, "", "UPP"], [2, 4, 1, "", "UniformPrevalenceProtocol"]], "quapy.protocol.APP": [[2, 3, 1, "", "prevalence_grid"], [2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[2, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[2, 3, 1, "", "collator"], [2, 2, 1, "", "random_state"], [2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.IterateProtocol": [[2, 3, 1, "", "total"]], "quapy.protocol.NPP": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[2, 4, 1, "", "RETURN_TYPES"], [2, 3, 1, "", "get_collator"], [2, 3, 1, "", "get_labelled_collection"], [2, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.UPP": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.util": [[2, 1, 1, "", "EarlyStop"], [2, 5, 1, "", "create_if_not_exist"], [2, 5, 1, "", "create_parent_dir"], [2, 5, 1, "", "download_file"], [2, 5, 1, "", "download_file_if_not_exists"], [2, 5, 1, "", "get_quapy_home"], [2, 5, 1, "", "map_parallel"], [2, 5, 1, "", "parallel"], [2, 5, 1, "", "parallel_unpack"], [2, 5, 1, "", "pickled_resource"], [2, 5, 1, "", "save_text_file"], [2, 5, 1, "", "temp_seed"], [2, 5, 1, "", "timeout"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"welcom": 0, "quapi": [0, 1, 2, 3, 4, 5], "": 0, "document": 0, "instal": 0, "github": 0, "content": [0, 2, 3, 4, 5], "indic": 0, "tabl": 0, "packag": [2, 3, 4, 5], "subpackag": 2, "submodul": [2, 3, 4, 5], "error": 2, "modul": [2, 3, 4, 5], "evalu": 2, "function": 2, "model_select": 2, "plot": 2, "protocol": 2, "util": 2, "classif": 3, "calibr": 3, "method": [3, 5], "neural": 3, "svmperf": 3, "data": 4, "base": [4, 5], "dataset": 4, "preprocess": 4, "reader": 4, "aggreg": 5, "meta": 5, "non_aggreg": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Welcome to QuaPy\u2019s documentation!": [[0, "welcome-to-quapy-s-documentation"]], "Installation": [[0, "installation"]], "GitHub": [[0, "github"]], "Contents": [[0, "contents"]], "Indices and tables": [[0, "indices-and-tables"]], "quapy": [[1, "quapy"]], "Submodules": [[3, "submodules"], [4, "submodules"], [2, "submodules"], [5, "submodules"]], "Module contents": [[3, "module-quapy.classification"], [4, "module-quapy.data"], [2, "module-quapy"], [5, "module-quapy.method"]], "quapy.classification package": [[3, "quapy-classification-package"]], "quapy.classification.calibration module": [[3, "module-quapy.classification.calibration"]], "quapy.classification.methods module": [[3, "module-quapy.classification.methods"]], "quapy.classification.neural module": [[3, "module-quapy.classification.neural"]], "quapy.classification.svmperf module": [[3, "module-quapy.classification.svmperf"]], "quapy.data package": [[4, "quapy-data-package"]], "quapy.data.base module": [[4, "module-quapy.data.base"]], "quapy.data.datasets module": [[4, "module-quapy.data.datasets"]], "quapy.data.preprocessing module": [[4, "module-quapy.data.preprocessing"]], "quapy.data.reader module": [[4, "module-quapy.data.reader"]], "quapy package": [[2, "quapy-package"]], "Subpackages": [[2, "subpackages"]], "quapy.error module": [[2, "module-quapy.error"]], "quapy.evaluation module": [[2, "module-quapy.evaluation"]], "quapy.functional module": [[2, "module-quapy.functional"]], "quapy.model_selection module": [[2, "module-quapy.model_selection"]], "quapy.plot module": [[2, "module-quapy.plot"]], "quapy.protocol module": [[2, "module-quapy.protocol"]], "quapy.util module": [[2, "module-quapy.util"]], "quapy.method package": [[5, "quapy-method-package"]], "quapy.method.aggregative module": [[5, "module-quapy.method.aggregative"]], "quapy.method.base module": [[5, "module-quapy.method.base"]], "quapy.method.meta module": [[5, "module-quapy.method.meta"]], "quapy.method.non_aggregative module": [[5, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[2, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[2, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol"]], "artificialprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.ArtificialPrevalenceProtocol"]], "configstatus (class in quapy.model_selection)": [[2, "quapy.model_selection.ConfigStatus"]], "domainmixer (class in quapy.protocol)": [[2, "quapy.protocol.DomainMixer"]], "error (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.ERROR"]], "earlystop (class in quapy.util)": [[2, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[2, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[2, "quapy.functional.HellingerDistance"]], "invalid (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.INVALID"]], "iterateprotocol (class in quapy.protocol)": [[2, "quapy.protocol.IterateProtocol"]], "npp (class in quapy.protocol)": [[2, "quapy.protocol.NPP"]], "naturalprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.NaturalPrevalenceProtocol"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[2, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "success (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.SUCCESS"]], "status (class in quapy.model_selection)": [[2, "quapy.model_selection.Status"]], "timeout (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.TIMEOUT"]], "topsoedistance() (in module quapy.functional)": [[2, "quapy.functional.TopsoeDistance"]], "upp (class in quapy.protocol)": [[2, "quapy.protocol.UPP"]], "uniformprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.UniformPrevalenceProtocol"]], "absolute_error() (in module quapy.error)": [[2, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[2, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[2, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[2, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[2, "quapy.error.ae"]], "argmin_prevalence() (in module quapy.functional)": [[2, "quapy.functional.argmin_prevalence"]], "as_binary_prevalence() (in module quapy.functional)": [[2, "quapy.functional.as_binary_prevalence"]], "best_model() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[2, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[2, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[2, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[2, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[2, "quapy.functional.check_prevalence_vector"]], "clip_prevalence() (in module quapy.functional)": [[2, "quapy.functional.clip_prevalence"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "counts_from_labels() (in module quapy.functional)": [[2, "quapy.functional.counts_from_labels"]], "create_if_not_exist() (in module quapy.util)": [[2, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[2, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[2, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[2, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[2, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[2, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluate"]], "evaluate_on_samples() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluate_on_samples"]], "evaluation_report() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluation_report"]], "expand_grid() (in module quapy.model_selection)": [[2, "quapy.model_selection.expand_grid"]], "f1_error() (in module quapy.error)": [[2, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[2, "quapy.error.f1e"]], "failed() (quapy.model_selection.configstatus method)": [[2, "quapy.model_selection.ConfigStatus.failed"]], "fit() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[2, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_divergence() (in module quapy.functional)": [[2, "quapy.functional.get_divergence"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[2, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[2, "quapy.util.get_quapy_home"]], "group_params() (in module quapy.model_selection)": [[2, "quapy.model_selection.group_params"]], "kld() (in module quapy.error)": [[2, "quapy.error.kld"]], "linear_search() (in module quapy.functional)": [[2, "quapy.functional.linear_search"]], "mae() (in module quapy.error)": [[2, "quapy.error.mae"]], "map_onto_probability_simplex() (in module quapy.functional)": [[2, "quapy.functional.map_onto_probability_simplex"]], "map_parallel() (in module quapy.util)": [[2, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_absolute_error"]], "mean_normalized_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_normalized_absolute_error"]], "mean_normalized_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_normalized_relative_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[2, "quapy.error.mkld"]], "mnae() (in module quapy.error)": [[2, "quapy.error.mnae"]], "mnkld() (in module quapy.error)": [[2, "quapy.error.mnkld"]], "mnrae() (in module quapy.error)": [[2, "quapy.error.mnrae"]], "module": [[2, "module-quapy"], [2, "module-quapy.error"], [2, "module-quapy.evaluation"], [2, "module-quapy.functional"], [2, "module-quapy.model_selection"], [2, "module-quapy.plot"], [2, "module-quapy.protocol"], [2, "module-quapy.util"], [5, "module-quapy.method"], [5, "module-quapy.method._kdey"], [5, "module-quapy.method._neural"], [5, "module-quapy.method._threshold_optim"], [5, "module-quapy.method.aggregative"], [5, "module-quapy.method.base"], [5, "module-quapy.method.meta"], [5, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[2, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[2, "quapy.error.mse"]], "nae() (in module quapy.error)": [[2, "quapy.error.nae"]], "nkld() (in module quapy.error)": [[2, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[2, "quapy.functional.normalize_prevalence"]], "normalized_absolute_error() (in module quapy.error)": [[2, "quapy.error.normalized_absolute_error"]], "normalized_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.normalized_relative_absolute_error"]], "nrae() (in module quapy.error)": [[2, "quapy.error.nrae"]], "num_prevalence_combinations() (in module quapy.functional)": [[2, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "optim_minimize() (in module quapy.functional)": [[2, "quapy.functional.optim_minimize"]], "parallel() (in module quapy.util)": [[2, "quapy.util.parallel"]], "parallel_unpack() (in module quapy.util)": [[2, "quapy.util.parallel_unpack"]], "pickled_resource() (in module quapy.util)": [[2, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[2, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[2, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[2, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[2, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[2, "module-quapy"]], "quapy.error": [[2, "module-quapy.error"]], "quapy.evaluation": [[2, "module-quapy.evaluation"]], "quapy.functional": [[2, "module-quapy.functional"]], "quapy.model_selection": [[2, "module-quapy.model_selection"]], "quapy.plot": [[2, "module-quapy.plot"]], "quapy.protocol": [[2, "module-quapy.protocol"]], "quapy.util": [[2, "module-quapy.util"]], "rae() (in module quapy.error)": [[2, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[2, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[2, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[2, "quapy.error.smooth"]], "solve_adjustment() (in module quapy.functional)": [[2, "quapy.functional.solve_adjustment"]], "strprev() (in module quapy.functional)": [[2, "quapy.functional.strprev"]], "success() (quapy.model_selection.configstatus method)": [[2, "quapy.model_selection.ConfigStatus.success"]], "temp_seed() (in module quapy.util)": [[2, "quapy.util.temp_seed"]], "timeout() (in module quapy.util)": [[2, "quapy.util.timeout"]], "total() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[2, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.iterateprotocol method)": [[2, "quapy.protocol.IterateProtocol.total"]], "total() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[2, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[2, "quapy.functional.uniform_simplex_sampling"]], "acc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativecrispquantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeCrispQuantifier"]], "aggregativemedianestimator (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator"]], "aggregativequantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeQuantifier"]], "aggregativesoftquantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeSoftQuantifier"]], "bandwidth_method (quapy.method._kdey.kdebase attribute)": [[5, "quapy.method._kdey.KDEBase.BANDWIDTH_METHOD"]], "basequantifier (class in quapy.method.base)": [[5, "quapy.method.base.BaseQuantifier"]], "bayesiancc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.BayesianCC"]], "binaryaggregativequantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[5, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.CC"]], "clipping (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.CLIPPING"]], "classifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ClassifyAndCount"]], "dmx (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.DMx"]], "dmy (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.DMy"]], "distributionmatchingx (in module quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.DistributionMatchingX"]], "distributionmatchingy (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.DistributionMatchingY"]], "dys (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[5, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[5, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[5, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[5, "quapy.method.meta.EHDy"]], "em() (quapy.method.aggregative.emq class method)": [[5, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.EMQ"]], "emq_bcts() (quapy.method.aggregative.emq class method)": [[5, "quapy.method.aggregative.EMQ.EMQ_BCTS"]], "epacc() (in module quapy.method.meta)": [[5, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[5, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[5, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "hdx() (quapy.method.non_aggregative.dmx class method)": [[5, "quapy.method.non_aggregative.DMx.HDx"]], "hdy (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.HellingerDistanceY"]], "kdebase (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEBase"]], "kdeycs (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyCS"]], "kdeyhd (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyHD"]], "kdeyml (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyML"]], "max (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[5, "quapy.method.aggregative.EMQ.MAX_ITER"]], "methods (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.METHODS"]], "ms (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MS"]], "ms2 (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "medianestimator (class in quapy.method.meta)": [[5, "quapy.method.meta.MedianEstimator"]], "medianestimator2 (class in quapy.method.meta)": [[5, "quapy.method.meta.MedianEstimator2"]], "onevsall (class in quapy.method.base)": [[5, "quapy.method.base.OneVsAll"]], "onevsallaggregative (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.OneVsAllAggregative"]], "onevsallgeneric (class in quapy.method.base)": [[5, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method._neural)": [[5, "quapy.method._neural.QuaNetModule"]], "quanettrainer (class in quapy.method._neural)": [[5, "quapy.method._neural.QuaNetTrainer"]], "readme (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.ReadMe"]], "sld (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.SMM"]], "solvers (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.SOLVERS"]], "t50 (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.T50"]], "thresholdoptimization (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[5, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.X"]], "aggregate() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.aggregate"]], "aggregate() (quapy.method._kdey.kdeyhd method)": [[5, "quapy.method._kdey.KDEyHD.aggregate"]], "aggregate() (quapy.method._kdey.kdeyml method)": [[5, "quapy.method._kdey.KDEyML.aggregate"]], "aggregate() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.aggregate"]], "aggregate() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregate"]], "aggregate() (quapy.method.aggregative.acc method)": [[5, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[5, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.dmy method)": [[5, "quapy.method.aggregative.DMy.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[5, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[5, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsallaggregative method)": [[5, "quapy.method.aggregative.OneVsAllAggregative.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[5, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[5, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[5, "quapy.method.aggregative.SMM.aggregate"]], "aggregate_with_threshold() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregate_with_threshold"]], "aggregation_fit() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.aggregation_fit"]], "aggregation_fit() (quapy.method._kdey.kdeyhd method)": [[5, "quapy.method._kdey.KDEyHD.aggregation_fit"]], "aggregation_fit() (quapy.method._kdey.kdeyml method)": [[5, "quapy.method._kdey.KDEyML.aggregation_fit"]], "aggregation_fit() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.aggregation_fit"]], "aggregation_fit() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.acc method)": [[5, "quapy.method.aggregative.ACC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.cc method)": [[5, "quapy.method.aggregative.CC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.dmy method)": [[5, "quapy.method.aggregative.DMy.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.dys method)": [[5, "quapy.method.aggregative.DyS.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.hdy method)": [[5, "quapy.method.aggregative.HDy.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.pacc method)": [[5, "quapy.method.aggregative.PACC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.pcc method)": [[5, "quapy.method.aggregative.PCC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.smm method)": [[5, "quapy.method.aggregative.SMM.aggregation_fit"]], "aggregative (quapy.method.meta.ensemble property)": [[5, "quapy.method.meta.Ensemble.aggregative"]], "classes_ (quapy.method._neural.quanettrainer property)": [[5, "quapy.method._neural.QuaNetTrainer.classes_"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.base.onevsallgeneric property)": [[5, "quapy.method.base.OneVsAllGeneric.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classifier_fit_predict() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classifier_fit_predict"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.classify"]], "classify() (quapy.method.aggregative.onevsallaggregative method)": [[5, "quapy.method.aggregative.OneVsAllAggregative.classify"]], "clean_checkpoint() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.clean_checkpoint_dir"]], "condition() (quapy.method._threshold_optim.max method)": [[5, "quapy.method._threshold_optim.MAX.condition"]], "condition() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.condition"]], "condition() (quapy.method._threshold_optim.t50 method)": [[5, "quapy.method._threshold_optim.T50.condition"]], "condition() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.condition"]], "condition() (quapy.method._threshold_optim.x method)": [[5, "quapy.method._threshold_optim.X.condition"]], "device (quapy.method._neural.quanetmodule property)": [[5, "quapy.method._neural.QuaNetModule.device"]], "discard() (quapy.method._threshold_optim.ms2 method)": [[5, "quapy.method._threshold_optim.MS2.discard"]], "discard() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.discard"]], "ensemblefactory() (in module quapy.method.meta)": [[5, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.fit"]], "fit() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.binaryaggregativequantifier method)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.fit"]], "fit() (quapy.method.base.basequantifier method)": [[5, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[5, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.fit"]], "fit() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.fit"]], "fit() (quapy.method.non_aggregative.dmx method)": [[5, "quapy.method.non_aggregative.DMx.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "fit() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.fit"]], "forward() (quapy.method._neural.quanetmodule method)": [[5, "quapy.method._neural.QuaNetModule.forward"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[5, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[5, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_conditional_probability_samples() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.get_conditional_probability_samples"]], "get_kde_function() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.get_kde_function"]], "get_mixture_components() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.get_mixture_components"]], "get_params() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.get_params"]], "get_params() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.get_params"]], "get_params() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.get_params"]], "get_params() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.get_params"]], "get_prevalence_samples() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.get_prevalence_samples"]], "get_probability_distribution() (in module quapy.method.meta)": [[5, "quapy.method.meta.get_probability_distribution"]], "gram_matrix_mix_sum() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.gram_matrix_mix_sum"]], "mae_loss() (in module quapy.method._neural)": [[5, "quapy.method._neural.mae_loss"]], "neg_label (quapy.method.aggregative.binaryaggregativequantifier property)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.neg_label"]], "newelm() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newELM"]], "newinvariantratioestimation() (quapy.method.aggregative.acc class method)": [[5, "quapy.method.aggregative.ACC.newInvariantRatioEstimation"]], "newonevsall() (in module quapy.method.base)": [[5, "quapy.method.base.newOneVsAll"]], "newsvmae() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMAE"]], "newsvmkld() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMKLD"]], "newsvmq() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMQ"]], "newsvmrae() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMRAE"]], "pdf() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.pdf"]], "pos_label (quapy.method.aggregative.binaryaggregativequantifier property)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.pos_label"]], "predict_proba() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[5, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.quantify"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[5, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[5, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.quantify"]], "quantify() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.quantify"]], "quantify() (quapy.method.non_aggregative.dmx method)": [[5, "quapy.method.non_aggregative.DMx.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quantify() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.quantify"]], "quapy.method": [[5, "module-quapy.method"]], "quapy.method._kdey": [[5, "module-quapy.method._kdey"]], "quapy.method._neural": [[5, "module-quapy.method._neural"]], "quapy.method._threshold_optim": [[5, "module-quapy.method._threshold_optim"]], "quapy.method.aggregative": [[5, "module-quapy.method.aggregative"]], "quapy.method.base": [[5, "module-quapy.method.base"]], "quapy.method.meta": [[5, "module-quapy.method.meta"]], "quapy.method.non_aggregative": [[5, "module-quapy.method.non_aggregative"]], "sample_from_posterior() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.sample_from_posterior"]], "set_params() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.set_params"]], "set_params() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.set_params"]], "set_params() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.set_params"]], "set_params() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.set_params"]], "std_constrained_linear_ls() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.std_constrained_linear_ls"]], "training (quapy.method._neural.quanetmodule attribute)": [[5, "quapy.method._neural.QuaNetModule.training"]], "val_split (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.val_split"]], "val_split_ (quapy.method.aggregative.aggregativequantifier attribute)": [[5, "quapy.method.aggregative.AggregativeQuantifier.val_split_"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"i": [0, 2, 3, 4, 5], "python": [0, 4], "base": [0, 1, 2, 3], "open": [0, 2, 4], "sourc": [0, 2, 3, 4, 5], "framework": [0, 5], "quantif": [0, 2, 3, 4, 5], "thi": [0, 2, 3, 4, 5], "contain": [0, 2, 3, 4, 5], "api": 0, "modul": [0, 1], "includ": [0, 4, 5], "pip": [0, 5], "host": 0, "http": [0, 2, 4, 5], "com": [0, 2], "hlt": 0, "isti": 0, "packag": [0, 1], "subpackag": [0, 1], "classif": [0, 1, 2, 4, 5], "submodul": [0, 1], "calibr": [0, 1, 2], "bctscalibr": [0, 2, 3], "nbvscalibr": [0, 2, 3], "recalibratedprobabilisticclassifi": [0, 2, 3], "recalibratedprobabilisticclassifierbas": [0, 2, 3], "classes_": [0, 2, 3, 4, 5], "fit": [0, 1, 2, 3, 4, 5], "fit_cv": [0, 2, 3], "fit_tr_val": [0, 2, 3], "predict": [0, 1, 2, 3, 5], "predict_proba": [0, 2, 3, 5], "tscalibr": [0, 2, 3], "vscalibr": [0, 2, 3], "method": [0, 1, 2], "lowranklogisticregress": [0, 2, 3], "get_param": [0, 1, 2, 3, 5], "set_param": [0, 1, 2, 3, 5], "transform": [0, 2, 3, 4, 5], "neural": [0, 1, 2, 4, 5], "cnnnet": [0, 2, 3, 5], "document_embed": [0, 2, 3], "train": [0, 2, 3, 4, 5], "vocabulary_s": [0, 2, 3, 4, 5], "lstmnet": [0, 2, 3], "neuralclassifiertrain": [0, 2, 3, 5], "devic": [0, 2, 3, 5], "reset_net_param": [0, 2, 3], "textclassifiernet": [0, 2, 3], "dimens": [0, 2, 3, 4, 5], "forward": [0, 2, 3, 5], "xavier_uniform": [0, 2, 3], "torchdataset": [0, 2, 3], "asdataload": [0, 2, 3], "svmperf": [0, 1, 2, 5], "decision_funct": [0, 2, 3, 5], "valid_loss": [0, 2, 3, 5], "data": [0, 1, 2, 3, 5], "dataset": [0, 1, 2, 3, 5], "splitstratifi": [0, 2, 4], "binari": [0, 2, 3, 4, 5], "kfcv": [0, 2, 3, 4], "load": [0, 2, 4, 5], "n_class": [0, 2, 3, 4, 5], "reduc": [0, 2, 4], "stat": [0, 2, 4], "train_test": [0, 2, 4], "labelledcollect": [0, 2, 4, 5], "x": [0, 2, 3, 4, 5], "xp": [0, 2, 4], "xy": [0, 2, 4], "count": [0, 2, 4, 5], "join": [0, 2, 4], "p": [0, 2, 3, 4, 5], "preval": [0, 2, 3, 4, 5], "sampl": [0, 1, 2, 3, 4, 5], "sampling_from_index": [0, 2, 4], "sampling_index": [0, 2, 4], "split_random": [0, 2, 4], "split_stratifi": [0, 2, 4], "uniform_sampl": [0, 2, 4], "uniform_sampling_index": [0, 2, 4], "y": [0, 2, 3, 4, 5], "fetch_ifcb": [0, 2, 4], "fetch_ucibinarydataset": [0, 2, 4], "fetch_ucibinarylabelledcollect": [0, 2, 4], "fetch_ucimulticlassdataset": [0, 2, 4], "fetch_ucimulticlasslabelledcollect": [0, 2, 4], "fetch_lequa2022": [0, 2, 4], "fetch_review": [0, 2, 4, 5], "fetch_twitt": [0, 2, 4], "warn": [0, 2, 4, 5], "preprocess": [0, 1, 2, 5], "indextransform": [0, 2, 4], "add_word": [0, 2, 4], "fit_transform": [0, 2, 4], "index": [0, 2, 3, 4, 5], "reduce_column": [0, 2, 4], "standard": [0, 2, 3, 4, 5], "text2tfidf": [0, 2, 4], "reader": [0, 1, 2], "binar": [0, 2, 4], "from_csv": [0, 2, 4], "from_spars": [0, 2, 4], "from_text": [0, 2, 4], "reindex_label": [0, 2, 4], "aggreg": [0, 1, 2], "acc": [0, 1, 2, 5], "clip": [0, 1, 2, 5], "solver": [0, 2, 5], "aggregation_fit": [0, 2, 5], "getptecondestim": [0, 2, 5], "newinvariantratioestim": [0, 2, 5], "adjustedclassifyandcount": [0, 2, 5], "aggregativecrispquantifi": [0, 2, 5], "aggregativemedianestim": [0, 2, 5], "quantifi": [0, 1, 2, 4, 5], "aggregativequantifi": [0, 2, 5], "classifi": [0, 2, 3, 5], "classifier_fit_predict": [0, 2, 5], "val_split": [0, 2, 3, 5], "val_split_": [0, 2, 5], "aggregativesoftquantifi": [0, 2, 5], "bayesiancc": [0, 2, 5], "get_conditional_probability_sampl": [0, 2, 5], "get_prevalence_sampl": [0, 2, 5], "sample_from_posterior": [0, 2, 5], "binaryaggregativequantifi": [0, 2, 5], "neg_label": [0, 2, 5], "pos_label": [0, 2, 5], "cc": [0, 2, 5], "classifyandcount": [0, 2, 5], "dmy": [0, 2, 5], "distributionmatchingi": [0, 2, 5], "dy": [0, 2, 5], "emq": [0, 2, 5], "em": [0, 2, 5], "emq_bct": [0, 2, 5], "epsilon": [0, 2, 5], "max_it": [0, 2, 5], "expectationmaximizationquantifi": [0, 2, 5], "hdy": [0, 2, 5], "hellingerdistancei": [0, 2, 5], "onevsallaggreg": [0, 2, 5], "pacc": [0, 2, 5], "pcc": [0, 2, 5], "probabilisticadjustedclassifyandcount": [0, 2, 5], "probabilisticclassifyandcount": [0, 2, 5], "sld": [0, 2, 5], "smm": [0, 2, 5], "newelm": [0, 2, 5], "newsvma": [0, 2, 5], "newsvmkld": [0, 2, 5], "newsvmq": [0, 2, 5], "newsvmra": [0, 2, 5], "kdebas": [0, 2, 5], "bandwidth_method": [0, 2, 5], "get_kde_funct": [0, 2, 5], "get_mixture_compon": [0, 2, 5], "pdf": [0, 2, 5], "kdeyc": [0, 2, 5], "gram_matrix_mix_sum": [0, 2, 5], "kdeyhd": [0, 2, 5], "kdeyml": [0, 2, 5], "quanetmodul": [0, 2, 5], "quanettrain": [0, 2, 5], "clean_checkpoint": [0, 2, 5], "clean_checkpoint_dir": [0, 2, 5], "mae_loss": [0, 2, 5], "max": [0, 2, 5], "condit": [0, 2, 5], "m": [0, 2, 5], "ms2": [0, 2, 5], "discard": [0, 2, 5], "t50": [0, 2, 5], "thresholdoptim": [0, 2, 5], "aggregate_with_threshold": [0, 2, 5], "basequantifi": [0, 2, 5], "binaryquantifi": [0, 2, 5], "onevsal": [0, 2, 5], "onevsallgener": [0, 2, 5], "newonevsal": [0, 2, 5], "meta": [0, 1, 2], "eacc": [0, 2, 5], "ecc": [0, 2, 5], "eemq": [0, 2, 5], "ehdi": [0, 2, 5], "epacc": [0, 2, 5], "ensembl": [0, 2, 4, 5], "valid_polici": [0, 2, 5], "probabilist": [0, 2, 3, 5], "medianestim": [0, 2, 5], "medianestimator2": [0, 2, 5], "ensemblefactori": [0, 2, 5], "get_probability_distribut": [0, 2, 5], "non_aggreg": [0, 1, 2], "dmx": [0, 2, 5], "hdx": [0, 2, 5], "distributionmatchingx": [0, 2, 5], "maximumlikelihoodprevalenceestim": [0, 2, 5], "readm": [0, 2, 5], "std_constrained_linear_l": [0, 2, 5], "error": [0, 1, 3, 5], "absolute_error": [0, 1, 2], "acc_error": [0, 1, 2], "ae": [0, 1, 2], "f1_error": [0, 1, 2], "f1e": [0, 1, 2], "from_nam": [0, 1, 2], "kld": [0, 1, 2, 3, 5], "mae": [0, 1, 2, 3, 5], "mean_absolute_error": [0, 1, 2], "mean_normalized_absolute_error": [0, 1, 2], "mean_normalized_relative_absolute_error": [0, 1, 2], "mean_relative_absolute_error": [0, 1, 2], "mkld": [0, 1, 2, 5], "mnae": [0, 1, 2, 5], "mnkld": [0, 1, 2, 5], "mnrae": [0, 1, 2, 5], "mrae": [0, 1, 2, 3, 5], "mse": [0, 1, 2, 5], "nae": [0, 1, 2], "nkld": [0, 1, 2, 3, 5], "normalized_absolute_error": [0, 1, 2], "normalized_relative_absolute_error": [0, 1, 2], "nrae": [0, 1, 2], "rae": [0, 1, 2], "relative_absolute_error": [0, 1, 2], "se": [0, 1, 2], "smooth": [0, 1, 2], "evalu": [0, 1, 3, 4, 5], "evaluate_on_sampl": [0, 1, 2], "evaluation_report": [0, 1, 2], "function": [0, 1, 3, 4, 5], "hellingerdist": [0, 1, 2], "topsoedist": [0, 1, 2], "adjusted_quantif": [], "argmin_preval": [0, 1, 2], "as_binary_preval": [0, 1, 2], "check_prevalence_vector": [0, 1, 2], "clip_preval": [], "counts_from_label": [0, 1, 2], "get_diverg": [0, 1, 2], "get_nprevpoints_approxim": [0, 1, 2], "linear_search": [0, 1, 2], "map_onto_probability_simplex": [], "normalize_preval": [0, 1, 2], "num_prevalence_combin": [0, 1, 2], "optim_minim": [0, 1, 2, 5], "prevalence_from_label": [0, 1, 2], "prevalence_from_prob": [0, 1, 2], "prevalence_linspac": [0, 1, 2], "solve_adjust": [0, 1, 2], "strprev": [0, 1, 2], "uniform_prevalence_sampl": [0, 1, 2], "uniform_simplex_sampl": [0, 1, 2], "model_select": [0, 1, 5], "configstatu": [0, 1, 2], "fail": [0, 1, 2], "success": [0, 1, 2], "gridsearchq": [0, 1, 2, 5], "best_model": [0, 1, 2], "statu": [0, 1, 2], "invalid": [0, 1, 2], "timeout": [0, 1, 2], "cross_val_predict": [0, 1, 2], "expand_grid": [0, 1, 2], "group_param": [0, 1, 2], "plot": [0, 1], "binary_bias_bin": [0, 1, 2], "binary_bias_glob": [0, 1, 2], "binary_diagon": [0, 1, 2], "brokenbar_supremacy_by_drift": [0, 1, 2], "error_by_drift": [0, 1, 2], "protocol": [0, 1, 4, 5], "app": [0, 1, 2, 5], "prevalence_grid": [0, 1, 2], "samples_paramet": [0, 1, 2], "total": [0, 1, 2], "abstractprotocol": [0, 1, 2, 4], "abstractstochasticseededprotocol": [0, 1, 2], "collat": [0, 1, 2], "random_st": [0, 1, 2, 4, 5], "artificialprevalenceprotocol": [0, 1, 2], "domainmix": [0, 1, 2], "iterateprotocol": [0, 1, 2], "npp": [0, 1, 2], "naturalprevalenceprotocol": [0, 1, 2], "onlabelledcollectionprotocol": [0, 1, 2], "return_typ": [0, 1, 2], "get_col": [0, 1, 2], "get_labelled_collect": [0, 1, 2], "on_preclassified_inst": [0, 1, 2], "upp": [0, 1, 2], "uniformprevalenceprotocol": [0, 1, 2], "util": [0, 1, 3, 4], "earlystop": [0, 1, 2], "create_if_not_exist": [0, 1, 2], "create_parent_dir": [0, 1, 2], "download_fil": [0, 1, 2], "download_file_if_not_exist": [0, 1, 2], "get_quapy_hom": [0, 1, 2], "map_parallel": [0, 1, 2], "parallel": [0, 1, 2, 3, 4, 5], "parallel_unpack": [0, 1, 2], "pickled_resourc": [0, 1, 2], "save_text_fil": [0, 1, 2], "temp_se": [0, 1, 2], "search": [0, 2, 5], "page": 0, "content": 1, "implement": [2, 3, 4, 5], "measur": [2, 5], "us": [2, 3, 4, 5], "prev": [2, 4], "prevs_hat": 2, "comput": [2, 5], "absolut": [2, 5], "between": [2, 3, 5], "two": [2, 4, 5], "vector": [2, 3, 4, 5], "hat": [2, 5], "frac": [2, 5], "1": [2, 3, 4, 5], "mathcal": [2, 5], "sum_": [2, 5], "where": [2, 3, 4, 5], "ar": [2, 3, 4, 5], "class": [2, 3, 4, 5], "interest": 2, "paramet": [2, 3, 4, 5], "arrai": [2, 3, 4, 5], "like": [2, 3, 4, 5], "shape": [2, 3, 4, 5], "true": [2, 3, 4, 5], "valu": [2, 3, 4, 5], "return": [2, 3, 4, 5], "y_true": 2, "y_pred": 2, "term": [2, 3, 4, 5], "accuraci": [2, 5], "The": [2, 3, 4, 5], "tp": 2, "tn": 2, "fp": 2, "fn": 2, "stand": [2, 5], "posit": [2, 4, 5], "fals": [2, 3, 4, 5], "neg": [2, 5], "respect": [2, 5], "label": [2, 3, 4, 5], "f1": [2, 3], "simpli": [2, 5], "macro": 2, "f_1": 2, "e": [2, 3, 4, 5], "harmon": 2, "mean": [2, 3, 4, 5], "precis": 2, "recal": 2, "defin": [2, 3, 4, 5], "2tp": 2, "averag": [2, 4, 5], "each": [2, 3, 4, 5], "categori": 2, "independ": [2, 5], "err_nam": 2, "get": [2, 3, 4, 5], "an": [2, 3, 4, 5], "from": [2, 3, 4, 5], "its": [2, 3, 5], "name": [2, 3, 4, 5], "g": [2, 4, 5], "string": [2, 4, 5], "callabl": [2, 4, 5], "request": [2, 4, 5], "ep": 2, "none": [2, 3, 4, 5], "kullback": [2, 5], "leibler": [2, 5], "diverg": [2, 5], "distribut": [2, 4, 5], "d_": 2, "kl": 2, "log": [2, 4, 5], "factor": 2, "see": [2, 3, 4, 5], "case": [2, 3, 4, 5], "which": [2, 3, 4, 5], "zero": 2, "typic": [2, 3, 4, 5], "set": [2, 3, 4, 5], "2t": 2, "t": [2, 3, 5], "size": [2, 3, 4, 5], "If": [2, 4, 5], "taken": [2, 3, 4, 5], "environ": [2, 5], "variabl": [2, 4], "sample_s": [2, 5], "ha": [2, 3, 4, 5], "thu": [2, 3, 5], "beforehand": 2, "across": [2, 5], "pair": 2, "n_sampl": [2, 3], "normal": [0, 2, 4, 5], "rel": [2, 4, 5], "squar": [2, 5], "z_": 2, "2": [2, 4, 5], "min_": [2, 5], "math": [2, 5], "2frac": 2, "underlin": 2, "displaystyl": 2, "model": [2, 3, 4, 5], "error_metr": 2, "union": [2, 4, 5], "str": [2, 4, 5], "aggr_speedup": 2, "bool": [2, 3, 5], "auto": 2, "verbos": [2, 3, 4, 5], "accord": [2, 3, 4, 5], "specif": [2, 5], "gener": [2, 3, 4, 5], "one": [2, 4, 5], "metric": [2, 5], "instanc": [2, 3, 4, 5], "object": [2, 3, 4, 5], "also": [2, 3, 5], "speed": [2, 5], "up": [2, 3, 5], "can": [2, 4, 5], "run": [2, 4, 5], "charg": [2, 4], "repres": [2, 4, 5], "": [2, 3, 4, 5], "qp": [2, 4, 5], "itself": [2, 5], "whether": [2, 3, 4, 5], "appli": [2, 3, 4, 5], "forc": 2, "even": 2, "number": [2, 3, 4, 5], "origin": [2, 4, 5], "collect": [2, 3, 4, 5], "act": 2, "larger": [2, 4, 5], "than": [2, 3, 4, 5], "default": [2, 3, 4, 5], "let": [2, 5], "decid": [2, 4], "conveni": 2, "deactiv": 2, "boolean": [2, 4, 5], "show": [2, 3, 4, 5], "inform": [2, 3, 4, 5], "stdout": 2, "score": [2, 3, 4, 5], "singl": [2, 5], "float": [2, 3, 4, 5], "iter": [2, 4, 5], "given": [2, 3, 4, 5], "list": [2, 3, 4, 5], "report": [2, 5], "panda": 2, "datafram": 2, "more": [2, 4, 5], "column": [2, 4], "estim": [2, 3, 4, 5], "mani": [2, 5], "have": [2, 4, 5], "been": [2, 3, 4, 5], "indic": [2, 3, 4, 5], "displai": [2, 3], "everi": [2, 5], "via": [2, 3, 5], "central": 2, "all": [2, 3, 4, 5], "process": [2, 4], "endow": 2, "optim": [2, 3, 5], "larg": 2, "onli": [2, 3, 4, 5], "come": [2, 4, 5], "down": [2, 4, 5], "onc": [2, 4], "over": [2, 5], "instead": [2, 4, 5], "raw": [2, 4], "so": [2, 3, 4, 5], "never": 2, "call": [2, 4, 5], "again": 2, "behaviour": 2, "obtain": [2, 3, 5], "carri": [2, 4, 5], "out": [2, 3, 4, 5], "overal": 2, "need": [2, 4, 5], "exce": 2, "undertaken": 2, "issu": [2, 5], "tupl": [2, 4, 5], "true_prev": 2, "estim_prev": 2, "element": [2, 4, 5], "ndarrai": [2, 4, 5], "q": [2, 3, 5], "hellingh": 2, "distanc": [2, 5], "hd": [2, 5], "discret": [2, 5], "k": [2, 3, 4, 5], "bin": [2, 5], "sqrt": [2, 5], "p_i": 2, "q_i": 2, "real": [2, 3, 4, 5], "1e": [2, 3, 5], "20": [2, 5], "topso": [2, 5], "left": [2, 4, 5], "right": [2, 4, 5], "prevalence_estim": 2, "_supportsarrai": 2, "dtype": [2, 4], "_nestedsequ": 2, "int": [2, 4, 5], "complex": 2, "byte": 2, "tpr": [2, 5], "fpr": [2, 5], "adjust": [2, 5], "rate": [2, 3, 5], "might": [2, 4], "rang": [2, 5], "0": [2, 3, 4, 5], "loss": [2, 3, 5], "liter": [2, 5], "ternary_search": [0, 1, 2], "minim": [2, 5], "strategi": 2, "possibl": [2, 5], "scipi": [2, 4], "linear": [2, 5], "problem": [2, 4, 5], "space": [2, 3, 5], "01": [2, 3, 5], "02": 2, "ternari": [2, 5], "yet": 2, "np": [2, 4, 5], "positive_preval": 2, "clip_if_necessari": 2, "helper": 2, "order": [2, 4, 5], "guarante": [2, 4, 5], "result": [2, 5], "valid": [2, 3, 4, 5], "check": 2, "rais": [2, 5], "raise_except": 2, "toleranz": [], "08": 2, "sum": [2, 5], "otherwis": [2, 4, 5], "project": [2, 5], "proport": [2, 3, 4, 5], "probabl": [2, 3, 5], "perform": [2, 3, 5], "thei": [2, 5], "onto": [2, 5], "simplex": [2, 5], "n_instanc": [2, 3, 5], "correctli": 2, "when": [2, 3, 4, 5], "some": [2, 4, 5], "exampl": [2, 3, 4, 5], "len": 2, "occurr": 4, "receiv": 2, "argument": [2, 4, 5], "That": 2, "alreadi": 2, "tri": [2, 5], "instanti": [2, 3, 5], "correspond": [2, 4, 5], "combinations_budget": 2, "n_repeat": 2, "largest": 2, "equidist": 2, "point": [2, 4, 5], "combin": [2, 5], "dimension": [2, 3, 4, 5], "do": [2, 3, 4, 5], "integ": [2, 3, 4, 5], "maximum": [2, 3, 5], "allow": [2, 3, 4, 5], "repetit": 2, "less": [2, 4, 5], "best": [2, 3, 5], "explor": 2, "step": [2, 5], "ineffici": 2, "ad": 2, "complet": [2, 5], "earli": [2, 3, 5], "literatur": 2, "A": [2, 3, 4, 5], "most": [2, 4, 5], "power": 2, "altern": [2, 5], "found": [2, 3, 4, 5], "unnormalized_arr": 2, "code": [2, 3], "adapt": [2, 3], "mathieu": [2, 5], "blondel": [2, 5], "bsd": 2, "licens": 2, "accompani": 2, "paper": [2, 3, 5], "akinori": 2, "fujino": 2, "naonori": 2, "ueda": 2, "scale": [2, 3, 5], "multiclass": [2, 4, 5], "support": [2, 4, 5], "machin": [2, 3], "euclidean": 2, "icpr": 2, "2014": 2, "url": 2, "n": [2, 3, 5], "v": [2, 3, 5], "matrix": [2, 5], "consist": [2, 3, 4, 5], "l1": [2, 5], "convert": [2, 3, 4, 5], "n_prevpoint": 2, "equal": [2, 5], "distant": 2, "calcul": [2, 5], "binom": 2, "c": [2, 3, 4, 5], "time": [2, 4, 5], "r": [2, 4, 5], "mass": 2, "block": 2, "alloc": [2, 3], "solut": [2, 5], "star": 2, "bar": 2, "For": [2, 4, 5], "5": [2, 3, 4, 5], "25": [2, 3, 5], "75": [2, 5], "50": [2, 5], "yield": [2, 4, 5], "smallest": 2, "lost": 2, "constrain": [2, 4], "slsqp": 2, "routin": [2, 4, 5], "posterior": [2, 3, 5], "crisp": [2, 5], "decis": [2, 3, 5], "take": [2, 4, 5], "argmax": 2, "grid_point": 2, "21": 2, "repeat": 2, "smooth_limits_epsilon": 2, "produc": 2, "uniformli": 2, "separ": [2, 4], "By": 2, "05": [2, 5], "limit": [2, 5], "10": [2, 3, 5], "15": [2, 4], "90": 2, "95": 2, "99": 2, "interv": 2, "quantiti": 2, "add": [2, 4], "subtract": [2, 4], "p_c_cond_i": [], "p_c": 5, "invers": [2, 5], "invari": [2, 5], "ratio": [2, 5], "exact": [2, 4, 5], "solv": [2, 5], "equat": [2, 5], "misclassif": 5, "entri": [2, 5], "being": [2, 5], "belong": [2, 5], "end": [2, 5], "option": [2, 4, 5], "mai": 2, "exist": 2, "degener": 2, "vaz": [2, 5], "et": [2, 3, 4, 5], "al": [2, 3, 4, 5], "replac": [2, 4, 5], "last": [2, 3, 4, 5], "system": [2, 5], "rank": [2, 3, 5], "strictli": [2, 5], "full": [2, 4, 5], "deprec": [2, 5], "alwai": [2, 5], "prec": 2, "3": [2, 3, 4, 5], "represent": [2, 3, 5], "33": 2, "67": 2, "kraemer": 2, "algorithm": [2, 4, 5], "random": [2, 4, 5], "unit": [2, 5], "post": 2, "stackexchang": 2, "question": 2, "3227": 2, "uniform": [2, 4, 5], "_": [2, 4, 5], "param": [2, 3, 5], "msg": 2, "param_grid": [2, 5], "dict": [2, 4, 5], "type": [2, 4, 5], "refit": 2, "n_job": [2, 3, 4, 5], "raise_error": 2, "grid": [2, 5], "target": [2, 3, 5], "orient": [2, 5], "hyperparamet": [2, 5], "dictionari": [2, 3, 4, 5], "kei": [2, 4], "ones": [2, 4, 5], "those": [2, 3, 5], "quantification_error": 2, "whole": [2, 3], "chosen": 2, "ignor": [2, 4, 5], "gen": 2, "establish": 2, "timer": 2, "second": [2, 4], "configur": [2, 5], "test": [2, 3, 4, 5], "whenev": 2, "longer": [2, 5], "timeouterror": 2, "except": [2, 5], "bound": [2, 5], "ani": [2, 3, 4, 5], "mark": 2, "goe": 2, "howev": 2, "valueerror": 2, "through": 2, "after": [2, 5], "hyper": [2, 3, 5], "learn": [2, 3, 4, 5], "select": [2, 4, 5], "self": [2, 3, 4, 5], "deep": [2, 5], "unus": [2, 3], "contanin": 2, "enum": 2, "enumer": 2, "4": [2, 4], "nfold": [2, 4], "akin": [2, 5], "scikit": [2, 3, 4, 5], "fold": [2, 4, 5], "cross": [2, 3, 4, 5], "seed": [2, 4, 5], "reproduc": [2, 4], "expand": 2, "100": [2, 3, 4, 5], "b": [2, 4, 5], "print": [2, 3, 4], "assign": [2, 4], "partit": [2, 3], "anoth": [2, 5], "que": 2, "method_nam": 2, "pos_class": [2, 4], "titl": 2, "nbin": [2, 5], "colormap": 2, "matplotlib": 2, "color": 2, "listedcolormap": 2, "vertical_xtick": 2, "legend": 2, "savepath": 2, "box": 2, "local": 2, "bia": [2, 3, 5], "sign": 2, "minu": 2, "differ": [2, 4, 5], "classs": 2, "experi": [2, 4], "compon": [2, 3, 5], "cm": 2, "tab10": 2, "secondari": 2, "path": [2, 3, 4, 5], "save": [2, 4], "shown": 2, "global": 2, "show_std": 2, "train_prev": 2, "method_ord": 2, "diagon": 2, "along": [2, 5], "axi": 2, "describ": [2, 5], "henc": [2, 4, 5], "It": [2, 4], "though": 2, "other": [2, 4, 5], "prefer": 2, "deviat": [2, 4], "band": 2, "inconveni": 2, "compar": 2, "high": [2, 5], "leyend": 2, "hightlight": 2, "conduct": 2, "same": [2, 4, 5], "impos": 2, "associ": 2, "tr_prev": [2, 5], "n_bin": [2, 5], "isomer": 2, "x_error": 2, "y_error": 2, "ttest_alpha": 2, "005": 2, "tail_density_threshold": 2, "top": [2, 5], "region": 2, "shift": [2, 3, 5], "form": [2, 4, 5], "broken": 2, "chart": 2, "either": 2, "follow": [2, 4, 5], "hold": [2, 5], "ii": 2, "statist": [2, 5], "significantli": 2, "side": 2, "confid": 2, "made": [2, 4, 5], "isometr": 2, "percentil": 2, "divid": 2, "amount": [2, 5], "abov": 2, "consid": [2, 3, 4, 5], "involv": 2, "similar": [2, 5], "threshold": [2, 5], "densiti": [2, 5], "below": [2, 4], "tail": 2, "avoid": 2, "outlier": 2, "error_nam": 2, "show_dens": 2, "show_legend": 2, "logscal": 2, "vline": 2, "especi": 2, "cumberson": 2, "gain": 2, "understand": 2, "about": [2, 4, 5], "how": [2, 4, 5], "fare": 2, "prior": [2, 5], "spectrum": 2, "low": [2, 3], "regim": 2, "highlight": 2, "vertic": 2, "dot": 2, "line": 2, "n_preval": 2, "sanity_check": 2, "10000": [2, 5], "sample_prev": 2, "artifici": 2, "drawn": [2, 4], "extract": [2, 4, 5], "copi": [2, 4], "replic": 2, "sequenc": 2, "user": 2, "skip": 2, "labelled_collect": 2, "exhaust": 2, "depend": [2, 5], "11": 2, "9": 2, "implicit": 2, "return_constrained_dim": 2, "rest": [2, 3, 4], "note": [2, 4], "quit": 2, "obvious": 2, "doe": [2, 5], "determinist": 2, "anywher": 2, "multipli": 2, "realiz": 2, "necessari": [2, 5], "abstract": [2, 3, 4, 5], "parent": 2, "known": [2, 5], "procedur": 2, "enforc": 2, "fulli": 2, "In": [2, 3, 4, 5], "make": [2, 5], "extend": [2, 5], "input": [2, 3, 4, 5], "arg": [2, 3, 4, 5], "prepar": 2, "accommod": 2, "desir": [2, 4], "output": [2, 3, 4, 5], "format": [2, 4, 5], "befor": [2, 3, 4, 5], "inherit": 2, "custom": [2, 4], "addit": 2, "adher": 2, "properti": [2, 3, 4, 5], "determin": 2, "serv": [2, 4], "alia": [2, 4, 5], "domaina": 2, "domainb": 2, "mixture_point": 2, "mixtur": [2, 5], "domain": 2, "control": 2, "preserv": [2, 4], "draw": [2, 5], "specifi": [2, 3, 4, 5], "should": [2, 3, 4, 5], "zip": 2, "veri": 2, "simpl": [2, 5], "previous": [2, 5], "natur": 2, "therefor": 2, "approxim": [2, 3], "classmethod": [2, 4, 5], "pre_classif": 2, "in_plac": 2, "modifi": 2, "version": [2, 3], "pre": 2, "advanc": 2, "hard": [2, 3, 5], "modif": 2, "place": [2, 4], "new": [2, 4], "variant": [2, 5], "reli": [2, 5], "cover": [2, 3], "entir": 2, "sens": 2, "unlik": 2, "endeavour": 2, "intract": 2, "patienc": [2, 3, 5], "lower_is_bett": 2, "stop": [2, 3, 5], "network": [2, 3, 4, 5], "epoch": [2, 3, 5], "7": [2, 3, 5], "improv": [2, 3, 5], "best_epoch": 2, "best_scor": 2, "consecut": [2, 3, 4, 5], "monitor": 2, "obtaind": 2, "held": [2, 3, 5], "split": [2, 3, 4, 5], "wors": 2, "far": [2, 3, 4], "flag": 2, "keep": [2, 4], "track": 2, "seen": [2, 5], "wa": [2, 4, 5], "o": 2, "makedir": 2, "exist_ok": 2, "dir": [2, 5], "subdir": 2, "anotherdir": 2, "creat": [2, 5], "file": [2, 3, 4, 5], "txt": 2, "archive_filenam": 2, "download": [2, 4], "destin": 2, "filenam": 2, "dowload": 2, "home": [2, 4], "directori": [2, 3, 4, 5], "perman": 2, "quapy_data": 2, "func": 2, "slice": 2, "item": 2, "work": [2, 4, 5], "pass": [2, 3, 5], "worker": [2, 3, 4, 5], "asarrai": 2, "backend": [2, 5], "loki": [2, 5], "wrapper": [2, 3, 4, 5], "multiprocess": [2, 5], "delai": 2, "args_i": 2, "silent": [2, 5], "child": 2, "ensur": 2, "numer": [2, 4, 5], "handl": 2, "open_arg": 2, "pickle_path": 2, "generation_func": 2, "fast": [2, 4], "reus": [2, 4], "resourc": 2, "next": [2, 3, 4], "invok": [2, 4], "pickl": [2, 4, 5], "def": 2, "some_arrai": 2, "mock": [2, 3], "rand": 2, "my_arrai": 2, "pkl": 2, "first": [2, 4, 5], "text": [2, 3, 4, 5], "disk": [2, 4], "miss": 2, "context": 2, "tempor": [2, 3], "without": [2, 4], "outer": 2, "numpi": [2, 3], "current": [2, 3, 4, 5], "state": 2, "random_se": 2, "within": [2, 5], "launch": 2, "close": [2, 4, 5], "start_msg": 2, "end_msg": 2, "sleep": 2, "begin": 2, "correct": [3, 5], "temperatur": [3, 5], "bct": [3, 5], "abstent": 3, "alexandari": [3, 5], "stratifi": [3, 4, 5], "retrain": 3, "afterward": [3, 5], "No": [3, 5], "nbv": [3, 5], "re": [3, 4], "kundaj": 3, "shrikumar": 3, "2020": 3, "novemb": 3, "likelihood": [3, 5], "beat": [3, 5], "intern": [3, 4, 5], "confer": [3, 4], "pp": 3, "222": 3, "232": 3, "pmlr": 3, "baseestim": [3, 5], "calibratorfactori": 3, "n_featur": [3, 5], "manner": [3, 5], "val": [3, 4], "These": [3, 5], "n_compon": 3, "kwarg": [3, 4, 5], "embed": [3, 5], "requir": [3, 4, 5], "quanet": [3, 5], "easili": 3, "sklearn": [3, 4, 5], "decomposit": 3, "truncatedsvd": 3, "while": [3, 4, 5], "linear_model": 3, "logisticregress": [3, 5], "princip": 3, "retain": [3, 5], "logist": [3, 5], "regress": 3, "map": [2, 3, 5], "length": [3, 4], "eventu": [3, 4], "unalt": 3, "emb": 3, "embedding_s": 3, "hidden_s": 3, "256": 3, "repr_siz": 3, "kernel_height": 3, "stride": 3, "pad": [3, 4], "drop_p": 3, "convolut": 3, "vocabulari": [3, 4], "word": [2, 3, 4, 5], "hidden": [3, 5], "document": [3, 4, 5], "kernel": [3, 5], "token": [3, 4], "drop": 3, "dropout": [3, 5], "layer": [3, 5], "batch": 3, "torch": [3, 5], "dataload": 3, "tensor": 3, "n_dimens": [3, 5], "lstm_class_nlay": 3, "long": 3, "short": 3, "memori": 3, "lstm": [3, 5], "net": 3, "lr": [3, 5], "001": [3, 5], "weight_decai": 3, "200": 3, "batch_siz": 3, "64": [3, 5], "batch_size_test": 3, "512": [3, 5], "padding_length": 3, "300": 3, "cuda": [3, 5], "checkpointpath": 3, "checkpoint": [3, 5], "classifier_net": 3, "dat": 3, "weight": [3, 4], "decai": 3, "wait": 3, "cpu": [3, 5], "enabl": 3, "gpu": [3, 5], "store": [3, 4, 5], "vocab_s": 3, "reiniti": 3, "trainer": 3, "learner": [3, 5], "disjoint": 3, "embed_s": 3, "nn": 3, "pad_length": 3, "xavier": 3, "initi": [3, 5], "shuffl": [3, 4], "dynam": [3, 4, 5], "longest": 3, "shorter": 3, "svmperf_bas": [3, 5], "host_fold": 3, "classifiermixin": 3, "svm": [3, 4, 5], "perf": [3, 5], "thorsten": 3, "joachim": [3, 5], "patch": [3, 5], "instal": [3, 5], "further": [3, 4, 5], "detail": [3, 4, 5], "refer": [3, 4], "esuli": [3, 4, 5], "2015": [3, 5], "barranquero": [3, 5], "svm_perf_learn": 3, "svm_perf_classifi": 3, "trade": [3, 5], "off": [3, 5], "margin": [3, 5], "std": 3, "avail": [3, 4, 5], "qacc": 3, "qf1": 3, "qgm": 3, "tmp": 3, "automat": 3, "delet": 3, "multivari": 3, "12": 3, "26": 3, "27": 3, "13": 3, "22": [3, 4], "23": 3, "24": 3, "textual": 4, "train_siz": 4, "6": 4, "conform": 4, "nrepeat": 4, "around": [4, 5], "round": 4, "train_path": 4, "test_path": 4, "loader_func": 4, "loader_kwarg": 4, "read": 4, "must": [2, 4, 5], "loader": 4, "n_train": 4, "n_test": 4, "quick": 4, "kindl": [4, 5], "tfidf": 4, "min_df": [4, 5], "tr": 4, "3821": 4, "te": 4, "21591": 4, "spars": 4, "csr": 4, "csr_matrix": 4, "featur": [4, 5], "4403": 4, "081": 4, "919": 4, "063": 4, "937": 4, "dedic": 4, "attach": 4, "them": [4, 5], "sever": 4, "infer": 4, "linearsvc": 4, "my_collect": 4, "codefram": 4, "both": 4, "frequenc": [4, 5], "actual": [4, 5], "lead": 4, "empti": 4, "sinc": [4, 5], "met": 4, "whose": [4, 5], "train_prop": 4, "randomli": 4, "stratif": 4, "greater": 4, "single_sample_train": 4, "for_model_select": 4, "data_hom": 4, "ifcb": 4, "zenodo": 4, "pleas": 4, "link": 4, "publicli": 4, "whoi": 4, "plankton": 4, "repo": [2, 4], "script": [4, 5], "gonz\u00e1lez": [4, 5], "basic": [4, 5], "precomput": 4, "togeth": 4, "individu": 4, "30": [4, 5], "86": 4, "286": 4, "dump": 4, "leav": [2, 4], "quay_data": 4, "test_gen": 4, "_ifcb": 4, "ifcbtrainsamplesfromdir": 4, "seri": 4, "ifcbtestsampl": 4, "dataset_nam": 4, "test_split": 4, "uci": 4, "p\u00e9rez": [4, 5], "g\u00e1llego": [4, 5], "quevedo": 4, "j": [2, 4, 5], "del": 4, "coz": 4, "2017": [4, 5], "characteriz": 4, "chang": 4, "studi": 4, "fusion": 4, "34": [4, 5], "87": 4, "castano": 4, "2019": [4, 5], "task": 4, "45": 4, "predefin": 4, "fetch_ucilabelledcollect": 4, "access": [4, 5], "uci_dataset": 4, "ml": [4, 5], "repositori": 4, "adopt": 4, "5fcvx2": 4, "x2": 4, "import": [4, 5], "yeast": 4, "archiv": 4, "ic": 4, "edu": 4, "criteria": 4, "1000": [4, 5], "suit": 4, "ucimlrepo": 4, "dry": 4, "bean": 4, "uci_multiclass_dataset": 4, "offici": 4, "provid": [4, 5], "lequa": 4, "competit": 4, "brief": 4, "t1a": 4, "t1b": 4, "t2a": 4, "t2b": 4, "sentiment": 4, "28": 4, "merchandis": 4, "product": 4, "we": 4, "moreo": [4, 5], "sebastiani": [4, 5], "f": [4, 5], "sperduti": 4, "2022": [4, 5], "overview": 4, "clef": 4, "descript": 4, "lequa2022_experi": 4, "py": 4, "folder": [4, 5], "guid": 4, "val_gen": 4, "_lequa2022": 4, "samplesfromdir": 4, "subclass": [4, 5], "review": 4, "recurr": 4, "proceed": [4, 5], "27th": 4, "acm": [4, 5], "knowledg": 4, "manag": 4, "2018": [2, 4, 5], "reviews_sentiment_dataset": 4, "hp": 4, "imdb": 4, "matric": 4, "minimun": 4, "kept": 4, "faster": 4, "subsequ": 4, "twitter": 4, "gao": [4, 5], "w": 4, "tweet": 4, "analysi": 4, "social": 4, "mining6": 4, "19": 4, "2016": [4, 5], "semeval13": 4, "semeval14": 4, "semeval15": 4, "share": 4, "twitter_sentiment_datasets_train": 4, "twitter_sentiment_datasets_test": 4, "gasp": 4, "hcr": 4, "omd": 4, "sander": 4, "semeval16": 4, "sst": 4, "wb": 4, "devel": 4, "style": 4, "id": 4, "would": [4, 5], "countvector": 4, "keyword": [4, 5], "nogap": 4, "regardless": 4, "special": 4, "codifi": 4, "unknown": 4, "surfac": 4, "assert": 4, "gap": 4, "preced": 4, "inplac": [4, 5], "To": 4, "uniqu": 4, "rare": 4, "occur": 4, "unk": 4, "minimum": [4, 5], "org": [4, 5], "stabl": 4, "feature_extract": 4, "html": 4, "subtyp": 4, "spmatrix": 4, "remov": [2, 4, 5], "present": 4, "least": 4, "infrequ": 4, "aka": [4, 5], "z": 4, "sublinear_tf": 4, "part": 4, "scall": 4, "tf": 4, "counter": 4, "tfidfvector": 4, "categor": 4, "toward": [4, 5], "whcih": 4, "had": 4, "encod": 4, "utf": 4, "8": [4, 5], "csv": 4, "feat1": 4, "feat2": 4, "featn": 4, "covari": [4, 5], "express": 4, "col": 4, "row": 4, "class2int": 4, "collet": 4, "fomart": 4, "progress": 4, "sentenc": 4, "classnam": 4, "u1": 4, "springer": [], "articl": [], "1007": [], "s10618": [], "008": [], "0097": [], "invert": 5, "l2": 5, "norm": [2, 5], "ax": 5, "better": 5, "consult": 5, "buns": 5, "On": 5, "multi": 5, "extens": 5, "2nd": 5, "workshop": 5, "applic": 5, "lq": 5, "ecml": 5, "pkdd": 5, "grenobl": 5, "franc": 5, "classif_predict": 5, "y_": 5, "construct": 5, "jmlr": [], "v20": [], "18": [], "456": [], "abc": 5, "base_quantifi": 5, "median": 5, "parameter": 5, "parllel": 5, "subobject": 5, "well": 5, "nest": 5, "pipelin": 5, "latter": 5, "__": 5, "updat": 5, "reason": 5, "phase": 5, "classification_fit": 5, "maintain": 5, "attribut": 5, "give": 5, "fit_classifi": 5, "predict_on": 5, "outsid": 5, "remaind": 5, "expect": 5, "non": 5, "soft": 5, "num_warmup": 5, "500": 5, "num_sampl": 5, "mcmc_seed": 5, "bayesian": 5, "rather": 5, "diagnos": 5, "degeneraci": 5, "visibl": 5, "confus": 5, "uncertainti": 5, "extra": 5, "bay": 5, "warmup": 5, "mcmc": 5, "sampler": 5, "One": 5, "noth": 5, "here": 5, "cdf": 5, "match": 5, "helling": 5, "sought": 5, "choic": 5, "channel": 5, "proper": 5, "ch": 5, "particular": 5, "di": 5, "dij": 5, "fraction": 5, "th": 5, "tol": 5, "find": 5, "got": 5, "dl": 5, "doi": 5, "1145": 5, "3219819": 5, "3220059": 5, "histogram": 5, "toler": [2, 5], "classif_posterior": 5, "exact_train_prev": 5, "recalib": 5, "maxim": 5, "saeren": 5, "latinn": 5, "decaesteck": 5, "mutual": 5, "recurs": 5, "wai": 5, "until": 5, "converg": 5, "heurist": 5, "propos": 5, "recalibr": 5, "meant": 5, "messag": 5, "observ": 5, "posterior_prob": 5, "0001": 5, "reach": 5, "loop": 5, "ir": 5, "accordingli": 5, "unlabel": 5, "binary_quantifi": 5, "parallel_backend": 5, "prevel": 5, "emploi": [2, 5], "joblib": 5, "help": 5, "elm": 5, "cannot": 5, "temp": 5, "dure": 5, "resp": 5, "simplif": 5, "conceptu": 5, "equival": 5, "explicit": 5, "famili": 5, "structur": 5, "purpos": 5, "svmperf_hom": 5, "properli": 5, "underli": 5, "2021": 5, "_kdei": 5, "common": 5, "ancestor": 5, "kde": 5, "scott": 5, "silverman": 5, "bandwidth": 5, "wrap": 5, "kerneldens": 5, "evalut": 5, "kdei": 5, "cauchi": 5, "schwarz": 5, "author": 5, "mont": 5, "carlo": 5, "approach": 5, "alpha": 5, "delta": 5, "d": 5, "boldsymbol": 5, "q_": 5, "widetild": 5, "u": 5, "p_": 5, "alpha_i": 5, "l": 5, "_i": 5, "p_x": 5, "x_i": 5, "h": 5, "datapoint": 5, "center": 5, "mathrm": 5, "dx": 5, "2dx": 5, "admit": 5, "montecarlo_tri": 5, "disntac": 5, "_f": 5, "trial": 5, "x_1": 5, "ldot": 5, "x_t": 5, "sim_": 5, "iid": 5, "criterion": 5, "mathbb": 5, "_neural": 5, "doc_embedding_s": 5, "stats_siz": 5, "lstm_hidden_s": 5, "lstm_nlayer": 5, "ff_layer": 5, "1024": 5, "bidirect": 5, "qdrop_p": 5, "order_bi": 5, "cell": 5, "dens": 5, "connect": 5, "ff": 5, "sort": 5, "doc_embed": 5, "doc_posterior": 5, "overridden": 5, "although": 5, "recip": 5, "former": 5, "care": 5, "regist": 5, "hook": 5, "n_epoch": 5, "tr_iter_per_poch": 5, "va_iter_per_poch": 5, "checkpointdir": 5, "checkpointnam": 5, "pytorch": 5, "advantag": 5, "cnn": 5, "estim_preval": 5, "anyth": 5, "40": 5, "66": 5, "ground": 5, "truth": 5, "_threshold_optim": 5, "forman": 5, "2006": 5, "2008": 5, "look": 5, "goal": 5, "bring": 5, "stabil": 5, "denomin": 5, "sweep": 5, "closest": 5, "choos": 5, "deliv": 5, "interpret": 5, "complement": 5, "param_mod_sel": 5, "param_model_sel": 5, "red_siz": 5, "min_po": 5, "polici": 5, "av": 5, "max_sample_s": 5, "ptr": 5, "member": 5, "preliminari": 5, "final": 5, "recomput": 5, "static": 5, "compat": 5, "recommend": 5, "gridsearchcv": 5, "base_quantifier_class": 5, "factori": 5, "unifi": 5, "interfac": 5, "logspac": 5, "class_weight": 5, "balanc": 5, "110": 5, "setup": 5, "mimick": 5, "castro": 5, "alaiz": 5, "rodr\u00edguez": 5, "alegr": 5, "2013": 5, "nfeat": 5, "dissimilar": 5, "mlpe": 5, "lazi": 5, "assum": 5, "put": 5, "assumpion": 5, "irrespect": 5, "lower": [2, 5], "estimant": 5, "bootstrap_tri": 5, "bootstrap_rang": 5, "bagging_tri": 5, "bagging_rang": 5, "vectorizer_kwarg": 5, "class_cond_x": 5, "hat_yi": 5, "yj": 5, "yi": 5, "projection_simplex_sort": [0, 1, 2, 5], "ip_if_necessari": [], "appear": 2, "decim": 2, "formula": 2, "condsoftmax": [0, 1, 2, 5], "l1_norm": [0, 1, 2], "softmax": [0, 1, 2, 5], "solve_adjustment_binari": [0, 1, 2], "aggr": 2, "verifi": 2, "li": 2, "arraylik": 2, "lie": [2, 5], "num_vector": 2, "becom": 2, "happen": 2, "rescal": 2, "mapsimplex": [2, 5], "were": 2, "you": 2, "want": 2, "untouch": 2, "class_conditional_r": 2, "unadjusted_count": 2, "unadjust": 2, "y_i": 2, "m_": 2, "ij": 2, "y_j": 2, "futur": 2}, "objects": {"": [[2, 0, 0, "-", "quapy"]], "quapy": [[3, 0, 0, "-", "classification"], [4, 0, 0, "-", "data"], [2, 0, 0, "-", "error"], [2, 0, 0, "-", "evaluation"], [2, 0, 0, "-", "functional"], [5, 0, 0, "-", "method"], [2, 0, 0, "-", "model_selection"], [2, 0, 0, "-", "plot"], [2, 0, 0, "-", "protocol"], [2, 0, 0, "-", "util"]], "quapy.classification": [[3, 0, 0, "-", "calibration"], [3, 0, 0, "-", "methods"], [3, 0, 0, "-", "neural"], [3, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[3, 1, 1, "", "BCTSCalibration"], [3, 1, 1, "", "NBVSCalibration"], [3, 1, 1, "", "RecalibratedProbabilisticClassifier"], [3, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [3, 1, 1, "", "TSCalibration"], [3, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[3, 2, 1, "", "classes_"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "fit_cv"], [3, 3, 1, "", "fit_tr_val"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[3, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"], [3, 3, 1, "", "set_params"], [3, 3, 1, "", "transform"]], "quapy.classification.neural": [[3, 1, 1, "", "CNNnet"], [3, 1, 1, "", "LSTMnet"], [3, 1, 1, "", "NeuralClassifierTrainer"], [3, 1, 1, "", "TextClassifierNet"], [3, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "get_params"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "get_params"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[3, 2, 1, "", "device"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict"], [3, 3, 1, "", "predict_proba"], [3, 3, 1, "", "reset_net_params"], [3, 3, 1, "", "set_params"], [3, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[3, 3, 1, "", "dimensions"], [3, 3, 1, "", "document_embedding"], [3, 3, 1, "", "forward"], [3, 3, 1, "", "get_params"], [3, 3, 1, "", "predict_proba"], [3, 4, 1, "", "training"], [3, 2, 1, "", "vocabulary_size"], [3, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[3, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[3, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[3, 3, 1, "", "decision_function"], [3, 3, 1, "", "fit"], [3, 3, 1, "", "predict"], [3, 4, 1, "", "valid_losses"]], "quapy.data": [[4, 0, 0, "-", "base"], [4, 0, 0, "-", "datasets"], [4, 0, 0, "-", "preprocessing"], [4, 0, 0, "-", "reader"]], "quapy.data.base": [[4, 1, 1, "", "Dataset"], [4, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[4, 3, 1, "", "SplitStratified"], [4, 2, 1, "", "binary"], [4, 2, 1, "", "classes_"], [4, 3, 1, "", "kFCV"], [4, 3, 1, "", "load"], [4, 2, 1, "", "n_classes"], [4, 3, 1, "", "reduce"], [4, 3, 1, "", "stats"], [4, 2, 1, "", "train_test"], [4, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[4, 2, 1, "", "X"], [4, 2, 1, "", "Xp"], [4, 2, 1, "", "Xy"], [4, 2, 1, "", "binary"], [4, 3, 1, "", "counts"], [4, 3, 1, "", "join"], [4, 3, 1, "", "kFCV"], [4, 3, 1, "", "load"], [4, 2, 1, "", "n_classes"], [4, 2, 1, "", "p"], [4, 3, 1, "", "prevalence"], [4, 3, 1, "", "sampling"], [4, 3, 1, "", "sampling_from_index"], [4, 3, 1, "", "sampling_index"], [4, 3, 1, "", "split_random"], [4, 3, 1, "", "split_stratified"], [4, 3, 1, "", "stats"], [4, 3, 1, "", "uniform_sampling"], [4, 3, 1, "", "uniform_sampling_index"], [4, 2, 1, "", "y"]], "quapy.data.datasets": [[4, 5, 1, "", "fetch_IFCB"], [4, 5, 1, "", "fetch_UCIBinaryDataset"], [4, 5, 1, "", "fetch_UCIBinaryLabelledCollection"], [4, 5, 1, "", "fetch_UCIMulticlassDataset"], [4, 5, 1, "", "fetch_UCIMulticlassLabelledCollection"], [4, 5, 1, "", "fetch_lequa2022"], [4, 5, 1, "", "fetch_reviews"], [4, 5, 1, "", "fetch_twitter"], [4, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[4, 1, 1, "", "IndexTransformer"], [4, 5, 1, "", "index"], [4, 5, 1, "", "reduce_columns"], [4, 5, 1, "", "standardize"], [4, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[4, 3, 1, "", "add_word"], [4, 3, 1, "", "fit"], [4, 3, 1, "", "fit_transform"], [4, 3, 1, "", "transform"], [4, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[4, 5, 1, "", "binarize"], [4, 5, 1, "", "from_csv"], [4, 5, 1, "", "from_sparse"], [4, 5, 1, "", "from_text"], [4, 5, 1, "", "reindex_labels"]], "quapy.error": [[2, 5, 1, "", "absolute_error"], [2, 5, 1, "", "acc_error"], [2, 5, 1, "", "acce"], [2, 5, 1, "", "ae"], [2, 5, 1, "", "f1_error"], [2, 5, 1, "", "f1e"], [2, 5, 1, "", "from_name"], [2, 5, 1, "", "kld"], [2, 5, 1, "", "mae"], [2, 5, 1, "", "mean_absolute_error"], [2, 5, 1, "", "mean_normalized_absolute_error"], [2, 5, 1, "", "mean_normalized_relative_absolute_error"], [2, 5, 1, "", "mean_relative_absolute_error"], [2, 5, 1, "", "mkld"], [2, 5, 1, "", "mnae"], [2, 5, 1, "", "mnkld"], [2, 5, 1, "", "mnrae"], [2, 5, 1, "", "mrae"], [2, 5, 1, "", "mse"], [2, 5, 1, "", "nae"], [2, 5, 1, "", "nkld"], [2, 5, 1, "", "normalized_absolute_error"], [2, 5, 1, "", "normalized_relative_absolute_error"], [2, 5, 1, "", "nrae"], [2, 5, 1, "", "rae"], [2, 5, 1, "", "relative_absolute_error"], [2, 5, 1, "", "se"], [2, 5, 1, "", "smooth"]], "quapy.evaluation": [[2, 5, 1, "", "evaluate"], [2, 5, 1, "", "evaluate_on_samples"], [2, 5, 1, "", "evaluation_report"], [2, 5, 1, "", "prediction"]], "quapy.functional": [[2, 5, 1, "", "HellingerDistance"], [2, 5, 1, "", "TopsoeDistance"], [2, 5, 1, "", "argmin_prevalence"], [2, 5, 1, "", "as_binary_prevalence"], [2, 5, 1, "", "check_prevalence_vector"], [2, 5, 1, "", "clip"], [2, 5, 1, "", "condsoftmax"], [2, 5, 1, "", "counts_from_labels"], [2, 5, 1, "", "get_divergence"], [2, 5, 1, "", "get_nprevpoints_approximation"], [2, 5, 1, "", "l1_norm"], [2, 5, 1, "", "linear_search"], [2, 5, 1, "", "normalize_prevalence"], [2, 5, 1, "", "num_prevalence_combinations"], [2, 5, 1, "", "optim_minimize"], [2, 5, 1, "", "prevalence_from_labels"], [2, 5, 1, "", "prevalence_from_probabilities"], [2, 5, 1, "", "prevalence_linspace"], [2, 5, 1, "", "projection_simplex_sort"], [2, 5, 1, "", "softmax"], [2, 5, 1, "", "solve_adjustment"], [2, 5, 1, "", "solve_adjustment_binary"], [2, 5, 1, "", "strprev"], [2, 5, 1, "", "ternary_search"], [2, 5, 1, "", "uniform_prevalence_sampling"], [2, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[5, 0, 0, "-", "_kdey"], [5, 0, 0, "-", "_neural"], [5, 0, 0, "-", "_threshold_optim"], [5, 0, 0, "-", "aggregative"], [5, 0, 0, "-", "base"], [5, 0, 0, "-", "meta"], [5, 0, 0, "-", "non_aggregative"]], "quapy.method._kdey": [[5, 1, 1, "", "KDEBase"], [5, 1, 1, "", "KDEyCS"], [5, 1, 1, "", "KDEyHD"], [5, 1, 1, "", "KDEyML"]], "quapy.method._kdey.KDEBase": [[5, 4, 1, "", "BANDWIDTH_METHOD"], [5, 3, 1, "", "get_kde_function"], [5, 3, 1, "", "get_mixture_components"], [5, 3, 1, "", "pdf"]], "quapy.method._kdey.KDEyCS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "gram_matrix_mix_sum"]], "quapy.method._kdey.KDEyHD": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method._kdey.KDEyML": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method._neural": [[5, 1, 1, "", "QuaNetModule"], [5, 1, 1, "", "QuaNetTrainer"], [5, 5, 1, "", "mae_loss"]], "quapy.method._neural.QuaNetModule": [[5, 2, 1, "", "device"], [5, 3, 1, "", "forward"], [5, 4, 1, "", "training"]], "quapy.method._neural.QuaNetTrainer": [[5, 2, 1, "", "classes_"], [5, 3, 1, "", "clean_checkpoint"], [5, 3, 1, "", "clean_checkpoint_dir"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method._threshold_optim": [[5, 1, 1, "", "MAX"], [5, 1, 1, "", "MS"], [5, 1, 1, "", "MS2"], [5, 1, 1, "", "T50"], [5, 1, 1, "", "ThresholdOptimization"], [5, 1, 1, "", "X"]], "quapy.method._threshold_optim.MAX": [[5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.MS2": [[5, 3, 1, "", "discard"]], "quapy.method._threshold_optim.T50": [[5, 3, 1, "", "condition"]], "quapy.method._threshold_optim.ThresholdOptimization": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregate_with_threshold"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "condition"], [5, 3, 1, "", "discard"]], "quapy.method._threshold_optim.X": [[5, 3, 1, "", "condition"]], "quapy.method.aggregative": [[5, 1, 1, "", "ACC"], [5, 4, 1, "", "AdjustedClassifyAndCount"], [5, 1, 1, "", "AggregativeCrispQuantifier"], [5, 1, 1, "", "AggregativeMedianEstimator"], [5, 1, 1, "", "AggregativeQuantifier"], [5, 1, 1, "", "AggregativeSoftQuantifier"], [5, 1, 1, "", "BayesianCC"], [5, 1, 1, "", "BinaryAggregativeQuantifier"], [5, 1, 1, "", "CC"], [5, 4, 1, "", "ClassifyAndCount"], [5, 1, 1, "", "DMy"], [5, 4, 1, "", "DistributionMatchingY"], [5, 1, 1, "", "DyS"], [5, 1, 1, "", "EMQ"], [5, 4, 1, "", "ExpectationMaximizationQuantifier"], [5, 1, 1, "", "HDy"], [5, 4, 1, "", "HellingerDistanceY"], [5, 1, 1, "", "OneVsAllAggregative"], [5, 1, 1, "", "PACC"], [5, 1, 1, "", "PCC"], [5, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [5, 4, 1, "", "ProbabilisticClassifyAndCount"], [5, 4, 1, "", "SLD"], [5, 1, 1, "", "SMM"], [5, 5, 1, "", "newELM"], [5, 5, 1, "", "newSVMAE"], [5, 5, 1, "", "newSVMKLD"], [5, 5, 1, "", "newSVMQ"], [5, 5, 1, "", "newSVMRAE"]], "quapy.method.aggregative.ACC": [[5, 4, 1, "", "METHODS"], [5, 4, 1, "", "NORMALIZATIONS"], [5, 4, 1, "", "SOLVERS"], [5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "getPteCondEstim"], [5, 3, 1, "", "newInvariantRatioEstimation"]], "quapy.method.aggregative.AggregativeMedianEstimator": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.aggregative.AggregativeQuantifier": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 2, 1, "", "classes_"], [5, 2, 1, "", "classifier"], [5, 3, 1, "", "classifier_fit_predict"], [5, 3, 1, "", "classify"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"], [5, 2, 1, "", "val_split"], [5, 4, 1, "", "val_split_"]], "quapy.method.aggregative.BayesianCC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "get_conditional_probability_samples"], [5, 3, 1, "", "get_prevalence_samples"], [5, 3, 1, "", "sample_from_posterior"]], "quapy.method.aggregative.BinaryAggregativeQuantifier": [[5, 3, 1, "", "fit"], [5, 2, 1, "", "neg_label"], [5, 2, 1, "", "pos_label"]], "quapy.method.aggregative.CC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DMy": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.DyS": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.EMQ": [[5, 3, 1, "", "EM"], [5, 3, 1, "", "EMQ_BCTS"], [5, 4, 1, "", "EPSILON"], [5, 4, 1, "", "MAX_ITER"], [5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "classify"], [5, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.OneVsAllAggregative": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "classify"]], "quapy.method.aggregative.PACC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"], [5, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.aggregative.SMM": [[5, 3, 1, "", "aggregate"], [5, 3, 1, "", "aggregation_fit"]], "quapy.method.base": [[5, 1, 1, "", "BaseQuantifier"], [5, 1, 1, "", "BinaryQuantifier"], [5, 1, 1, "", "OneVsAll"], [5, 1, 1, "", "OneVsAllGeneric"], [5, 5, 1, "", "newOneVsAll"]], "quapy.method.base.BaseQuantifier": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[5, 2, 1, "", "classes_"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.meta": [[5, 5, 1, "", "EACC"], [5, 5, 1, "", "ECC"], [5, 5, 1, "", "EEMQ"], [5, 5, 1, "", "EHDy"], [5, 5, 1, "", "EPACC"], [5, 1, 1, "", "Ensemble"], [5, 1, 1, "", "MedianEstimator"], [5, 1, 1, "", "MedianEstimator2"], [5, 5, 1, "", "ensembleFactory"], [5, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[5, 4, 1, "", "VALID_POLICIES"], [5, 2, 1, "", "aggregative"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 2, 1, "", "probabilistic"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.meta.MedianEstimator2": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "get_params"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[5, 1, 1, "", "DMx"], [5, 4, 1, "", "DistributionMatchingX"], [5, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"], [5, 1, 1, "", "ReadMe"]], "quapy.method.non_aggregative.DMx": [[5, 3, 1, "", "HDx"], [5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"]], "quapy.method.non_aggregative.ReadMe": [[5, 3, 1, "", "fit"], [5, 3, 1, "", "quantify"], [5, 3, 1, "", "std_constrained_linear_ls"]], "quapy.model_selection": [[2, 1, 1, "", "ConfigStatus"], [2, 1, 1, "", "GridSearchQ"], [2, 1, 1, "", "Status"], [2, 5, 1, "", "cross_val_predict"], [2, 5, 1, "", "expand_grid"], [2, 5, 1, "", "group_params"]], "quapy.model_selection.ConfigStatus": [[2, 3, 1, "", "failed"], [2, 3, 1, "", "success"]], "quapy.model_selection.GridSearchQ": [[2, 3, 1, "", "best_model"], [2, 3, 1, "", "fit"], [2, 3, 1, "", "get_params"], [2, 3, 1, "", "quantify"], [2, 3, 1, "", "set_params"]], "quapy.model_selection.Status": [[2, 4, 1, "", "ERROR"], [2, 4, 1, "", "INVALID"], [2, 4, 1, "", "SUCCESS"], [2, 4, 1, "", "TIMEOUT"]], "quapy.plot": [[2, 5, 1, "", "binary_bias_bins"], [2, 5, 1, "", "binary_bias_global"], [2, 5, 1, "", "binary_diagonal"], [2, 5, 1, "", "brokenbar_supremacy_by_drift"], [2, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[2, 1, 1, "", "APP"], [2, 1, 1, "", "AbstractProtocol"], [2, 1, 1, "", "AbstractStochasticSeededProtocol"], [2, 4, 1, "", "ArtificialPrevalenceProtocol"], [2, 1, 1, "", "DomainMixer"], [2, 1, 1, "", "IterateProtocol"], [2, 1, 1, "", "NPP"], [2, 4, 1, "", "NaturalPrevalenceProtocol"], [2, 1, 1, "", "OnLabelledCollectionProtocol"], [2, 1, 1, "", "UPP"], [2, 4, 1, "", "UniformPrevalenceProtocol"]], "quapy.protocol.APP": [[2, 3, 1, "", "prevalence_grid"], [2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[2, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[2, 3, 1, "", "collator"], [2, 2, 1, "", "random_state"], [2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.IterateProtocol": [[2, 3, 1, "", "total"]], "quapy.protocol.NPP": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[2, 4, 1, "", "RETURN_TYPES"], [2, 3, 1, "", "get_collator"], [2, 3, 1, "", "get_labelled_collection"], [2, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.UPP": [[2, 3, 1, "", "sample"], [2, 3, 1, "", "samples_parameters"], [2, 3, 1, "", "total"]], "quapy.util": [[2, 1, 1, "", "EarlyStop"], [2, 5, 1, "", "create_if_not_exist"], [2, 5, 1, "", "create_parent_dir"], [2, 5, 1, "", "download_file"], [2, 5, 1, "", "download_file_if_not_exists"], [2, 5, 1, "", "get_quapy_home"], [2, 5, 1, "", "map_parallel"], [2, 5, 1, "", "parallel"], [2, 5, 1, "", "parallel_unpack"], [2, 5, 1, "", "pickled_resource"], [2, 5, 1, "", "save_text_file"], [2, 5, 1, "", "temp_seed"], [2, 5, 1, "", "timeout"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"welcom": 0, "quapi": [0, 1, 2, 3, 4, 5], "": 0, "document": 0, "instal": 0, "github": 0, "content": [0, 2, 3, 4, 5], "indic": 0, "tabl": 0, "packag": [2, 3, 4, 5], "subpackag": 2, "submodul": [2, 3, 4, 5], "error": 2, "modul": [2, 3, 4, 5], "evalu": 2, "function": 2, "model_select": 2, "plot": 2, "protocol": 2, "util": 2, "classif": 3, "calibr": 3, "method": [3, 5], "neural": 3, "svmperf": 3, "data": 4, "base": [4, 5], "dataset": 4, "preprocess": 4, "reader": 4, "aggreg": 5, "meta": 5, "non_aggreg": 5}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Welcome to QuaPy\u2019s documentation!": [[0, "welcome-to-quapy-s-documentation"]], "Installation": [[0, "installation"]], "GitHub": [[0, "github"]], "Contents": [[0, "contents"]], "Indices and tables": [[0, "indices-and-tables"]], "quapy": [[1, "quapy"]], "Submodules": [[3, "submodules"], [2, "submodules"], [4, "submodules"], [5, "submodules"]], "Module contents": [[3, "module-quapy.classification"], [2, "module-quapy"], [4, "module-quapy.data"], [5, "module-quapy.method"]], "quapy.classification package": [[3, "quapy-classification-package"]], "quapy.classification.calibration module": [[3, "module-quapy.classification.calibration"]], "quapy.classification.methods module": [[3, "module-quapy.classification.methods"]], "quapy.classification.neural module": [[3, "module-quapy.classification.neural"]], "quapy.classification.svmperf module": [[3, "module-quapy.classification.svmperf"]], "quapy package": [[2, "quapy-package"]], "Subpackages": [[2, "subpackages"]], "quapy.error module": [[2, "module-quapy.error"]], "quapy.evaluation module": [[2, "module-quapy.evaluation"]], "quapy.functional module": [[2, "module-quapy.functional"]], "quapy.model_selection module": [[2, "module-quapy.model_selection"]], "quapy.plot module": [[2, "module-quapy.plot"]], "quapy.protocol module": [[2, "module-quapy.protocol"]], "quapy.util module": [[2, "module-quapy.util"]], "quapy.data package": [[4, "quapy-data-package"]], "quapy.data.base module": [[4, "module-quapy.data.base"]], "quapy.data.datasets module": [[4, "module-quapy.data.datasets"]], "quapy.data.preprocessing module": [[4, "module-quapy.data.preprocessing"]], "quapy.data.reader module": [[4, "module-quapy.data.reader"]], "quapy.method package": [[5, "quapy-method-package"]], "quapy.method.aggregative module": [[5, "module-quapy.method.aggregative"]], "quapy.method.base module": [[5, "module-quapy.method.base"]], "quapy.method.meta module": [[5, "module-quapy.method.meta"]], "quapy.method.non_aggregative module": [[5, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[2, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[2, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol"]], "artificialprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.ArtificialPrevalenceProtocol"]], "configstatus (class in quapy.model_selection)": [[2, "quapy.model_selection.ConfigStatus"]], "domainmixer (class in quapy.protocol)": [[2, "quapy.protocol.DomainMixer"]], "error (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.ERROR"]], "earlystop (class in quapy.util)": [[2, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[2, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[2, "quapy.functional.HellingerDistance"]], "invalid (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.INVALID"]], "iterateprotocol (class in quapy.protocol)": [[2, "quapy.protocol.IterateProtocol"]], "npp (class in quapy.protocol)": [[2, "quapy.protocol.NPP"]], "naturalprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.NaturalPrevalenceProtocol"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[2, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "success (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.SUCCESS"]], "status (class in quapy.model_selection)": [[2, "quapy.model_selection.Status"]], "timeout (quapy.model_selection.status attribute)": [[2, "quapy.model_selection.Status.TIMEOUT"]], "topsoedistance() (in module quapy.functional)": [[2, "quapy.functional.TopsoeDistance"]], "upp (class in quapy.protocol)": [[2, "quapy.protocol.UPP"]], "uniformprevalenceprotocol (in module quapy.protocol)": [[2, "quapy.protocol.UniformPrevalenceProtocol"]], "absolute_error() (in module quapy.error)": [[2, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[2, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[2, "quapy.error.acce"]], "ae() (in module quapy.error)": [[2, "quapy.error.ae"]], "argmin_prevalence() (in module quapy.functional)": [[2, "quapy.functional.argmin_prevalence"]], "as_binary_prevalence() (in module quapy.functional)": [[2, "quapy.functional.as_binary_prevalence"]], "best_model() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[2, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[2, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[2, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[2, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[2, "quapy.functional.check_prevalence_vector"]], "clip() (in module quapy.functional)": [[2, "quapy.functional.clip"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "condsoftmax() (in module quapy.functional)": [[2, "quapy.functional.condsoftmax"]], "counts_from_labels() (in module quapy.functional)": [[2, "quapy.functional.counts_from_labels"]], "create_if_not_exist() (in module quapy.util)": [[2, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[2, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[2, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[2, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[2, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[2, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluate"]], "evaluate_on_samples() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluate_on_samples"]], "evaluation_report() (in module quapy.evaluation)": [[2, "quapy.evaluation.evaluation_report"]], "expand_grid() (in module quapy.model_selection)": [[2, "quapy.model_selection.expand_grid"]], "f1_error() (in module quapy.error)": [[2, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[2, "quapy.error.f1e"]], "failed() (quapy.model_selection.configstatus method)": [[2, "quapy.model_selection.ConfigStatus.failed"]], "fit() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[2, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_divergence() (in module quapy.functional)": [[2, "quapy.functional.get_divergence"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[2, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[2, "quapy.util.get_quapy_home"]], "group_params() (in module quapy.model_selection)": [[2, "quapy.model_selection.group_params"]], "kld() (in module quapy.error)": [[2, "quapy.error.kld"]], "l1_norm() (in module quapy.functional)": [[2, "quapy.functional.l1_norm"]], "linear_search() (in module quapy.functional)": [[2, "quapy.functional.linear_search"]], "mae() (in module quapy.error)": [[2, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[2, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_absolute_error"]], "mean_normalized_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_normalized_absolute_error"]], "mean_normalized_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_normalized_relative_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[2, "quapy.error.mkld"]], "mnae() (in module quapy.error)": [[2, "quapy.error.mnae"]], "mnkld() (in module quapy.error)": [[2, "quapy.error.mnkld"]], "mnrae() (in module quapy.error)": [[2, "quapy.error.mnrae"]], "module": [[2, "module-quapy"], [2, "module-quapy.error"], [2, "module-quapy.evaluation"], [2, "module-quapy.functional"], [2, "module-quapy.model_selection"], [2, "module-quapy.plot"], [2, "module-quapy.protocol"], [2, "module-quapy.util"], [4, "module-quapy.data"], [4, "module-quapy.data.base"], [4, "module-quapy.data.datasets"], [4, "module-quapy.data.preprocessing"], [4, "module-quapy.data.reader"], [5, "module-quapy.method"], [5, "module-quapy.method._kdey"], [5, "module-quapy.method._neural"], [5, "module-quapy.method._threshold_optim"], [5, "module-quapy.method.aggregative"], [5, "module-quapy.method.base"], [5, "module-quapy.method.meta"], [5, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[2, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[2, "quapy.error.mse"]], "nae() (in module quapy.error)": [[2, "quapy.error.nae"]], "nkld() (in module quapy.error)": [[2, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[2, "quapy.functional.normalize_prevalence"]], "normalized_absolute_error() (in module quapy.error)": [[2, "quapy.error.normalized_absolute_error"]], "normalized_relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.normalized_relative_absolute_error"]], "nrae() (in module quapy.error)": [[2, "quapy.error.nrae"]], "num_prevalence_combinations() (in module quapy.functional)": [[2, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[2, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "optim_minimize() (in module quapy.functional)": [[2, "quapy.functional.optim_minimize"]], "parallel() (in module quapy.util)": [[2, "quapy.util.parallel"]], "parallel_unpack() (in module quapy.util)": [[2, "quapy.util.parallel_unpack"]], "pickled_resource() (in module quapy.util)": [[2, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[2, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[2, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[2, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[2, "quapy.functional.prevalence_linspace"]], "projection_simplex_sort() (in module quapy.functional)": [[2, "quapy.functional.projection_simplex_sort"]], "quantify() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[2, "module-quapy"]], "quapy.error": [[2, "module-quapy.error"]], "quapy.evaluation": [[2, "module-quapy.evaluation"]], "quapy.functional": [[2, "module-quapy.functional"]], "quapy.model_selection": [[2, "module-quapy.model_selection"]], "quapy.plot": [[2, "module-quapy.plot"]], "quapy.protocol": [[2, "module-quapy.protocol"]], "quapy.util": [[2, "module-quapy.util"]], "rae() (in module quapy.error)": [[2, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[2, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[2, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[2, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[2, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[2, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[2, "quapy.error.smooth"]], "softmax() (in module quapy.functional)": [[2, "quapy.functional.softmax"]], "solve_adjustment() (in module quapy.functional)": [[2, "quapy.functional.solve_adjustment"]], "solve_adjustment_binary() (in module quapy.functional)": [[2, "quapy.functional.solve_adjustment_binary"]], "strprev() (in module quapy.functional)": [[2, "quapy.functional.strprev"]], "success() (quapy.model_selection.configstatus method)": [[2, "quapy.model_selection.ConfigStatus.success"]], "temp_seed() (in module quapy.util)": [[2, "quapy.util.temp_seed"]], "ternary_search() (in module quapy.functional)": [[2, "quapy.functional.ternary_search"]], "timeout() (in module quapy.util)": [[2, "quapy.util.timeout"]], "total() (quapy.protocol.app method)": [[2, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[2, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[2, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.iterateprotocol method)": [[2, "quapy.protocol.IterateProtocol.total"]], "total() (quapy.protocol.npp method)": [[2, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.upp method)": [[2, "quapy.protocol.UPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[2, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[2, "quapy.functional.uniform_simplex_sampling"]], "dataset (class in quapy.data.base)": [[4, "quapy.data.base.Dataset"]], "indextransformer (class in quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.IndexTransformer"]], "labelledcollection (class in quapy.data.base)": [[4, "quapy.data.base.LabelledCollection"]], "splitstratified() (quapy.data.base.dataset class method)": [[4, "quapy.data.base.Dataset.SplitStratified"]], "x (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.X"]], "xp (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.Xp"]], "xy (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.Xy"]], "add_word() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.add_word"]], "binarize() (in module quapy.data.reader)": [[4, "quapy.data.reader.binarize"]], "binary (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.binary"]], "binary (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.binary"]], "classes_ (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.classes_"]], "counts() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.counts"]], "fetch_ifcb() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_IFCB"]], "fetch_ucibinarydataset() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_UCIBinaryDataset"]], "fetch_ucibinarylabelledcollection() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_UCIBinaryLabelledCollection"]], "fetch_ucimulticlassdataset() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_UCIMulticlassDataset"]], "fetch_ucimulticlasslabelledcollection() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_UCIMulticlassLabelledCollection"]], "fetch_lequa2022() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_lequa2022"]], "fetch_reviews() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_reviews"]], "fetch_twitter() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.fetch_twitter"]], "fit() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.fit"]], "fit_transform() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.fit_transform"]], "from_csv() (in module quapy.data.reader)": [[4, "quapy.data.reader.from_csv"]], "from_sparse() (in module quapy.data.reader)": [[4, "quapy.data.reader.from_sparse"]], "from_text() (in module quapy.data.reader)": [[4, "quapy.data.reader.from_text"]], "index() (in module quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.index"]], "join() (quapy.data.base.labelledcollection class method)": [[4, "quapy.data.base.LabelledCollection.join"]], "kfcv() (quapy.data.base.dataset class method)": [[4, "quapy.data.base.Dataset.kFCV"]], "kfcv() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.kFCV"]], "load() (quapy.data.base.dataset class method)": [[4, "quapy.data.base.Dataset.load"]], "load() (quapy.data.base.labelledcollection class method)": [[4, "quapy.data.base.LabelledCollection.load"]], "n_classes (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.n_classes"]], "n_classes (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.n_classes"]], "p (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.p"]], "prevalence() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.prevalence"]], "quapy.data": [[4, "module-quapy.data"]], "quapy.data.base": [[4, "module-quapy.data.base"]], "quapy.data.datasets": [[4, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[4, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[4, "module-quapy.data.reader"]], "reduce() (quapy.data.base.dataset method)": [[4, "quapy.data.base.Dataset.reduce"]], "reduce_columns() (in module quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.reduce_columns"]], "reindex_labels() (in module quapy.data.reader)": [[4, "quapy.data.reader.reindex_labels"]], "sampling() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.sampling"]], "sampling_from_index() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.sampling_from_index"]], "sampling_index() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.sampling_index"]], "split_random() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.split_random"]], "split_stratified() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.split_stratified"]], "standardize() (in module quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.standardize"]], "stats() (quapy.data.base.dataset method)": [[4, "quapy.data.base.Dataset.stats"]], "stats() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.stats"]], "text2tfidf() (in module quapy.data.preprocessing)": [[4, "quapy.data.preprocessing.text2tfidf"]], "train_test (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.train_test"]], "transform() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.transform"]], "uniform_sampling() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.uniform_sampling"]], "uniform_sampling_index() (quapy.data.base.labelledcollection method)": [[4, "quapy.data.base.LabelledCollection.uniform_sampling_index"]], "vocabulary_size (quapy.data.base.dataset property)": [[4, "quapy.data.base.Dataset.vocabulary_size"]], "vocabulary_size() (quapy.data.preprocessing.indextransformer method)": [[4, "quapy.data.preprocessing.IndexTransformer.vocabulary_size"]], "warn() (in module quapy.data.datasets)": [[4, "quapy.data.datasets.warn"]], "y (quapy.data.base.labelledcollection property)": [[4, "quapy.data.base.LabelledCollection.y"]], "acc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativecrispquantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeCrispQuantifier"]], "aggregativemedianestimator (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator"]], "aggregativequantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeQuantifier"]], "aggregativesoftquantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.AggregativeSoftQuantifier"]], "bandwidth_method (quapy.method._kdey.kdebase attribute)": [[5, "quapy.method._kdey.KDEBase.BANDWIDTH_METHOD"]], "basequantifier (class in quapy.method.base)": [[5, "quapy.method.base.BaseQuantifier"]], "bayesiancc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.BayesianCC"]], "binaryaggregativequantifier (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[5, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.CC"]], "classifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ClassifyAndCount"]], "dmx (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.DMx"]], "dmy (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.DMy"]], "distributionmatchingx (in module quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.DistributionMatchingX"]], "distributionmatchingy (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.DistributionMatchingY"]], "dys (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[5, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[5, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[5, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[5, "quapy.method.meta.EHDy"]], "em() (quapy.method.aggregative.emq class method)": [[5, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.EMQ"]], "emq_bcts() (quapy.method.aggregative.emq class method)": [[5, "quapy.method.aggregative.EMQ.EMQ_BCTS"]], "epacc() (in module quapy.method.meta)": [[5, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[5, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[5, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "hdx() (quapy.method.non_aggregative.dmx class method)": [[5, "quapy.method.non_aggregative.DMx.HDx"]], "hdy (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.HellingerDistanceY"]], "kdebase (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEBase"]], "kdeycs (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyCS"]], "kdeyhd (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyHD"]], "kdeyml (class in quapy.method._kdey)": [[5, "quapy.method._kdey.KDEyML"]], "max (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[5, "quapy.method.aggregative.EMQ.MAX_ITER"]], "methods (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.METHODS"]], "ms (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MS"]], "ms2 (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "medianestimator (class in quapy.method.meta)": [[5, "quapy.method.meta.MedianEstimator"]], "medianestimator2 (class in quapy.method.meta)": [[5, "quapy.method.meta.MedianEstimator2"]], "normalizations (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.NORMALIZATIONS"]], "onevsall (class in quapy.method.base)": [[5, "quapy.method.base.OneVsAll"]], "onevsallaggregative (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.OneVsAllAggregative"]], "onevsallgeneric (class in quapy.method.base)": [[5, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method._neural)": [[5, "quapy.method._neural.QuaNetModule"]], "quanettrainer (class in quapy.method._neural)": [[5, "quapy.method._neural.QuaNetTrainer"]], "readme (class in quapy.method.non_aggregative)": [[5, "quapy.method.non_aggregative.ReadMe"]], "sld (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[5, "quapy.method.aggregative.SMM"]], "solvers (quapy.method.aggregative.acc attribute)": [[5, "quapy.method.aggregative.ACC.SOLVERS"]], "t50 (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.T50"]], "thresholdoptimization (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[5, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method._threshold_optim)": [[5, "quapy.method._threshold_optim.X"]], "aggregate() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.aggregate"]], "aggregate() (quapy.method._kdey.kdeyhd method)": [[5, "quapy.method._kdey.KDEyHD.aggregate"]], "aggregate() (quapy.method._kdey.kdeyml method)": [[5, "quapy.method._kdey.KDEyML.aggregate"]], "aggregate() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.aggregate"]], "aggregate() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregate"]], "aggregate() (quapy.method.aggregative.acc method)": [[5, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[5, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.dmy method)": [[5, "quapy.method.aggregative.DMy.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[5, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[5, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsallaggregative method)": [[5, "quapy.method.aggregative.OneVsAllAggregative.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[5, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[5, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[5, "quapy.method.aggregative.SMM.aggregate"]], "aggregate_with_threshold() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregate_with_threshold"]], "aggregation_fit() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.aggregation_fit"]], "aggregation_fit() (quapy.method._kdey.kdeyhd method)": [[5, "quapy.method._kdey.KDEyHD.aggregation_fit"]], "aggregation_fit() (quapy.method._kdey.kdeyml method)": [[5, "quapy.method._kdey.KDEyML.aggregation_fit"]], "aggregation_fit() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.aggregation_fit"]], "aggregation_fit() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.acc method)": [[5, "quapy.method.aggregative.ACC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.cc method)": [[5, "quapy.method.aggregative.CC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.dmy method)": [[5, "quapy.method.aggregative.DMy.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.dys method)": [[5, "quapy.method.aggregative.DyS.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.hdy method)": [[5, "quapy.method.aggregative.HDy.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.pacc method)": [[5, "quapy.method.aggregative.PACC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.pcc method)": [[5, "quapy.method.aggregative.PCC.aggregation_fit"]], "aggregation_fit() (quapy.method.aggregative.smm method)": [[5, "quapy.method.aggregative.SMM.aggregation_fit"]], "aggregative (quapy.method.meta.ensemble property)": [[5, "quapy.method.meta.Ensemble.aggregative"]], "classes_ (quapy.method._neural.quanettrainer property)": [[5, "quapy.method._neural.QuaNetTrainer.classes_"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.base.onevsallgeneric property)": [[5, "quapy.method.base.OneVsAllGeneric.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classifier_fit_predict() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classifier_fit_predict"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.classify"]], "classify() (quapy.method.aggregative.onevsallaggregative method)": [[5, "quapy.method.aggregative.OneVsAllAggregative.classify"]], "clean_checkpoint() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.clean_checkpoint_dir"]], "condition() (quapy.method._threshold_optim.max method)": [[5, "quapy.method._threshold_optim.MAX.condition"]], "condition() (quapy.method._threshold_optim.ms method)": [[5, "quapy.method._threshold_optim.MS.condition"]], "condition() (quapy.method._threshold_optim.t50 method)": [[5, "quapy.method._threshold_optim.T50.condition"]], "condition() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.condition"]], "condition() (quapy.method._threshold_optim.x method)": [[5, "quapy.method._threshold_optim.X.condition"]], "device (quapy.method._neural.quanetmodule property)": [[5, "quapy.method._neural.QuaNetModule.device"]], "discard() (quapy.method._threshold_optim.ms2 method)": [[5, "quapy.method._threshold_optim.MS2.discard"]], "discard() (quapy.method._threshold_optim.thresholdoptimization method)": [[5, "quapy.method._threshold_optim.ThresholdOptimization.discard"]], "ensemblefactory() (in module quapy.method.meta)": [[5, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.fit"]], "fit() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.binaryaggregativequantifier method)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.fit"]], "fit() (quapy.method.base.basequantifier method)": [[5, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[5, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.fit"]], "fit() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.fit"]], "fit() (quapy.method.non_aggregative.dmx method)": [[5, "quapy.method.non_aggregative.DMx.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "fit() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.fit"]], "forward() (quapy.method._neural.quanetmodule method)": [[5, "quapy.method._neural.QuaNetModule.forward"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[5, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[5, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_conditional_probability_samples() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.get_conditional_probability_samples"]], "get_kde_function() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.get_kde_function"]], "get_mixture_components() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.get_mixture_components"]], "get_params() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.get_params"]], "get_params() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.get_params"]], "get_params() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.get_params"]], "get_params() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.get_params"]], "get_prevalence_samples() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.get_prevalence_samples"]], "get_probability_distribution() (in module quapy.method.meta)": [[5, "quapy.method.meta.get_probability_distribution"]], "gram_matrix_mix_sum() (quapy.method._kdey.kdeycs method)": [[5, "quapy.method._kdey.KDEyCS.gram_matrix_mix_sum"]], "mae_loss() (in module quapy.method._neural)": [[5, "quapy.method._neural.mae_loss"]], "neg_label (quapy.method.aggregative.binaryaggregativequantifier property)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.neg_label"]], "newelm() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newELM"]], "newinvariantratioestimation() (quapy.method.aggregative.acc class method)": [[5, "quapy.method.aggregative.ACC.newInvariantRatioEstimation"]], "newonevsall() (in module quapy.method.base)": [[5, "quapy.method.base.newOneVsAll"]], "newsvmae() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMAE"]], "newsvmkld() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMKLD"]], "newsvmq() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMQ"]], "newsvmrae() (in module quapy.method.aggregative)": [[5, "quapy.method.aggregative.newSVMRAE"]], "pdf() (quapy.method._kdey.kdebase method)": [[5, "quapy.method._kdey.KDEBase.pdf"]], "pos_label (quapy.method.aggregative.binaryaggregativequantifier property)": [[5, "quapy.method.aggregative.BinaryAggregativeQuantifier.pos_label"]], "predict_proba() (quapy.method.aggregative.emq method)": [[5, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[5, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.quantify"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[5, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[5, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[5, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.quantify"]], "quantify() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.quantify"]], "quantify() (quapy.method.non_aggregative.dmx method)": [[5, "quapy.method.non_aggregative.DMx.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[5, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quantify() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.quantify"]], "quapy.method": [[5, "module-quapy.method"]], "quapy.method._kdey": [[5, "module-quapy.method._kdey"]], "quapy.method._neural": [[5, "module-quapy.method._neural"]], "quapy.method._threshold_optim": [[5, "module-quapy.method._threshold_optim"]], "quapy.method.aggregative": [[5, "module-quapy.method.aggregative"]], "quapy.method.base": [[5, "module-quapy.method.base"]], "quapy.method.meta": [[5, "module-quapy.method.meta"]], "quapy.method.non_aggregative": [[5, "module-quapy.method.non_aggregative"]], "sample_from_posterior() (quapy.method.aggregative.bayesiancc method)": [[5, "quapy.method.aggregative.BayesianCC.sample_from_posterior"]], "set_params() (quapy.method._neural.quanettrainer method)": [[5, "quapy.method._neural.QuaNetTrainer.set_params"]], "set_params() (quapy.method.aggregative.aggregativemedianestimator method)": [[5, "quapy.method.aggregative.AggregativeMedianEstimator.set_params"]], "set_params() (quapy.method.meta.ensemble method)": [[5, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.meta.medianestimator method)": [[5, "quapy.method.meta.MedianEstimator.set_params"]], "set_params() (quapy.method.meta.medianestimator2 method)": [[5, "quapy.method.meta.MedianEstimator2.set_params"]], "std_constrained_linear_ls() (quapy.method.non_aggregative.readme method)": [[5, "quapy.method.non_aggregative.ReadMe.std_constrained_linear_ls"]], "training (quapy.method._neural.quanetmodule attribute)": [[5, "quapy.method._neural.QuaNetModule.training"]], "val_split (quapy.method.aggregative.aggregativequantifier property)": [[5, "quapy.method.aggregative.AggregativeQuantifier.val_split"]], "val_split_ (quapy.method.aggregative.aggregativequantifier attribute)": [[5, "quapy.method.aggregative.AggregativeQuantifier.val_split_"]]}}) \ No newline at end of file diff --git a/quapy/functional.py b/quapy/functional.py index 677715b..9c5c16b 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -8,36 +8,19 @@ import scipy import numpy as np -def prevalence_linspace(grid_points:int=21, repeats:int=1, smooth_limits_epsilon:float=0.01): - """ - Produces an array of uniformly separated values of prevalence. - By default, produces an array of 21 prevalence values, with - step 0.05 and with the limits smoothed, i.e.: - [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99] +# ------------------------------------------------------------------------------------------ +# Counter utils +# ------------------------------------------------------------------------------------------ - :param grid_points: the number of prevalence values to sample from the [0,1] interval (default 21) - :param repeats: number of times each prevalence is to be repeated (defaults to 1) - :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 - :return: an array of uniformly separated prevalence values +def counts_from_labels(labels: ArrayLike, classes: ArrayLike) -> np.ndarray: """ - p = np.linspace(0., 1., num=grid_points, endpoint=True) - p[0] += smooth_limits_epsilon - p[-1] -= smooth_limits_epsilon - if p[0] > p[1]: - raise ValueError(f'the smoothing in the limits is greater than the prevalence step') - if repeats > 1: - p = np.repeat(p, repeats) - return p - - -def counts_from_labels(labels: ArrayLike, classes: ArrayLike): - """ - Computes the count values from a vector of labels. + Computes the raw count values from a vector of labels. :param labels: array-like of shape `(n_instances,)` with the label for each instance :param classes: the class labels. This is needed in order to correctly compute the prevalence vector even when some classes have no examples. - :return: an ndarray of shape `(len(classes),)` with the occurrence counts of each class + :return: ndarray of shape `(len(classes),)` with the raw counts for each class, in the same order + as they appear in `classes` """ if np.asarray(labels).ndim != 1: raise ValueError(f'param labels does not seem to be a ndarray of label predictions') @@ -54,10 +37,12 @@ def prevalence_from_labels(labels: ArrayLike, classes: ArrayLike): :param labels: array-like of shape `(n_instances,)` with the label for each instance :param classes: the class labels. This is needed in order to correctly compute the prevalence vector even when some classes have no examples. - :return: an ndarray of shape `(len(classes))` with the class prevalence values + :return: ndarray of shape `(len(classes),)` with the class proportions for each class, in the same order + as they appear in `classes` """ - counts = np.array(counts_from_labels(labels, classes), dtype=float) - return counts / np.sum(counts) + counts = counts_from_labels(labels, classes) + prevalences = counts.astype(float) / np.sum(counts) + return prevalences def prevalence_from_probabilities(posteriors: ArrayLike, binarize: bool = False): @@ -71,7 +56,7 @@ def prevalence_from_probabilities(posteriors: ArrayLike, binarize: bool = False) """ posteriors = np.asarray(posteriors) if posteriors.ndim != 2: - raise ValueError(f'param posteriors does not seem to be a ndarray of posteior probabilities') + raise ValueError(f'param posteriors does not seem to be a ndarray of posterior probabilities') if binarize: predictions = np.argmax(posteriors, axis=-1) return prevalence_from_labels(predictions, np.arange(posteriors.shape[1])) @@ -81,23 +66,262 @@ def prevalence_from_probabilities(posteriors: ArrayLike, binarize: bool = False) return prevalences -def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_necessary: bool=False): +def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1) -> int: + """ + Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if `n_prevpoints` equally + distant prevalence values are generated and `n_repeats` repetitions are requested. + The computation comes down to calculating: + + .. math:: + \\binom{N+C-1}{C-1} \\times r + + where `N` is `n_prevpoints-1`, i.e., the number of probability mass blocks to allocate, `C` is the number of + classes, and `r` is `n_repeats`. This solution comes from the + `Stars and Bars `_ problem. + + :param int n_classes: number of classes + :param int n_prevpoints: number of prevalence points. + :param int n_repeats: number of repetitions for each prevalence combination + :return: The number of possible combinations. For example, if `n_classes`=2, `n_prevpoints`=5, `n_repeats`=1, + then the number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], + and [1.0,0.0] + """ + N = n_prevpoints-1 + C = n_classes + r = n_repeats + return int(scipy.special.binom(N + C - 1, C - 1) * r) + + +def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repeats:int=1) -> int: + """ + Searches for the largest number of (equidistant) prevalence points to define for each of the `n_classes` classes so + that the number of valid prevalence values generated as combinations of prevalence points (points in a + `n_classes`-dimensional simplex) do not exceed combinations_budget. + + :param int combinations_budget: maximum number of combinations allowed + :param int n_classes: number of classes + :param int n_repeats: number of repetitions for each prevalence combination + :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences + """ + assert n_classes > 0 and n_repeats > 0 and combinations_budget > 0, 'parameters must be positive integers' + n_prevpoints = 1 + while True: + combinations = num_prevalence_combinations(n_prevpoints, n_classes, n_repeats) + if combinations > combinations_budget: + return n_prevpoints-1 + else: + n_prevpoints += 1 + + +# ------------------------------------------------------------------------------------------ +# Prevalence vectors +# ------------------------------------------------------------------------------------------ + +def as_binary_prevalence(positive_prevalence: Union[float, ArrayLike], clip_if_necessary: bool=False) -> np.ndarray: """ Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two values representing a binary distribution. - :param positive_prevalence: prevalence for the positive class - :param clip_if_necessary: if True, clips the value in [0,1] in order to guarantee the resulting distribution + :param positive_prevalence: float or array-like of floats with the prevalence for the positive class + :param bool clip_if_necessary: if True, clips the value in [0,1] in order to guarantee the resulting distribution is valid. If False, it then checks that the value is in the valid range, and raises an error if not. :return: np.ndarray of shape `(2,)` """ + positive_prevalence = np.asarray(positive_prevalence, float) if clip_if_necessary: positive_prevalence = np.clip(positive_prevalence, 0, 1) else: - assert 0 <= positive_prevalence <= 1, 'the value provided is not a valid prevalence for the positive class' + assert np.logical_and(0 <= positive_prevalence, positive_prevalence <= 1).all(), \ + 'the value provided is not a valid prevalence for the positive class' return np.asarray([1-positive_prevalence, positive_prevalence]).T +def strprev(prevalences: ArrayLike, prec: int=3) -> str: + """ + Returns a string representation for a prevalence vector. E.g., + + >>> strprev([1/3, 2/3], prec=2) + >>> '[0.33, 0.67]' + + :param prevalences: array-like of prevalence values + :param prec: int, indicates the float precision (number of decimal values to print) + :return: string + """ + return '['+ ', '.join([f'{p:.{prec}f}' for p in prevalences]) + ']' + + +def check_prevalence_vector(prevalences: ArrayLike, raise_exception: bool=False, tolerance: float=1e-08, aggr=True): + """ + Checks that `prevalences` is a valid prevalence vector, i.e., it contains values in [0,1] and + the values sum up to 1. In other words, verifies that the `prevalences` vectors lies in the + probability simplex. + + :param ArrayLike prevalences: the prevalence vector, or vectors, to check + :param bool raise_exception: whether to raise an exception if the vector (or any of the vectors) does + not lie in the simplex (default False) + :param float tolerance: error tolerance for the check `sum(prevalences) - 1 = 0` + :param bool aggr: if True (default) returns one single bool (True if all prevalence vectors are valid, + False otherwise), if False returns an array of bool, one for each prevalence vector + :return: a single bool True if `prevalences` is a vector of prevalence values that lies on the simplex, + or False otherwise; alternatively, if `prevalences` is a matrix of shape `(num_vectors, n_classes,)` + then it returns one such bool for each prevalence vector + """ + prevalences = np.asarray(prevalences) + + all_positive = prevalences>=0 + if not all_positive.all(): + if raise_exception: + raise ValueError('some prevalence vectors contain negative numbers; ' + 'consider using the qp.functional.normalize_prevalence with ' + 'any method from ["clip", "mapsimplex", "softmax"]') + + all_close_1 = np.isclose(prevalences.sum(axis=-1), 1, atol=tolerance) + if not all_close_1.all(): + if raise_exception: + raise ValueError('some prevalence vectors do not sum up to 1; ' + 'consider using the qp.functional.normalize_prevalence with ' + 'any method from ["l1", "clip", "mapsimplex", "softmax"]') + + valid = np.logical_and(all_positive.all(axis=-1), all_close_1) + if aggr: + return valid.all() + else: + return valid + + +def normalize_prevalence(prevalences: ArrayLike, method='l1'): + """ + Normalizes a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in + cases in which the prevalence values are not all-zeros, and to convert the prevalence values into `1/n_classes` in + cases in which all values are zero. + + :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values + :param str method: indicates the normalization method to employ, options are: + + * `l1`: applies L1 normalization (default); a 0 vector is mapped onto the uniform prevalence + * `clip`: clip values in [0,1] and then rescales so that the L1 norm is 1 + * `mapsimplex`: projects vectors onto the probability simplex. This implementation relies on + `Mathieu Blondel's projection_simplex_sort `_ + * `softmax`: applies softmax to all vectors + * `condsoftmax`: applies softmax only to invalid prevalence vectors + + :return: a normalized vector or matrix of prevalence values + """ + if method in ['none', None]: + return prevalences + + prevalences = np.asarray(prevalences, dtype=float) + + if method=='l1': + normalized = l1_norm(prevalences) + check_prevalence_vector(normalized, raise_exception=True) + elif method=='clip': + normalized = clip(prevalences) # no need to check afterwards + elif method=='mapsimplex': + normalized = projection_simplex_sort(prevalences) + elif method=='softmax': + normalized = softmax(prevalences) + elif method=='condsoftmax': + normalized = condsoftmax(prevalences) + else: + raise ValueError(f'unknown {method=}, valid ones are ["l1", "clip", "mapsimplex", "softmax"]') + + return normalized + + +def l1_norm(prevalences: ArrayLike) -> np.ndarray: + """ + Applies L1 normalization to the `unnormalized_arr` so that it becomes a valid prevalence + vector. Zero vectors are mapped onto the uniform distribution. Raises an exception if + the resulting vectors are not valid distributions. This may happen when the original + prevalence vectors contain negative values. Use the `clip` normalization function + instead to avoid this possibility. + + :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values + :return: np.ndarray representing a valid distribution + """ + n_classes = prevalences.shape[-1] + accum = prevalences.sum(axis=-1, keepdims=True) + prevalences = np.true_divide(prevalences, accum, where=accum > 0) + allzeros = accum.flatten() == 0 + if any(allzeros): + if prevalences.ndim == 1: + prevalences = np.full(shape=n_classes, fill_value=1. / n_classes) + else: + prevalences[allzeros] = np.full(shape=n_classes, fill_value=1. / n_classes) + return prevalences + + +def clip(prevalences: ArrayLike) -> np.ndarray: + """ + Clips the values in [0,1] and then applies the L1 normalization. + + :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values + :return: np.ndarray representing a valid distribution + """ + clipped = np.clip(prevalences, 0, 1) + normalized = l1_norm(clipped) + return normalized + + +def projection_simplex_sort(unnormalized_arr: ArrayLike) -> np.ndarray: + """Projects a point onto the probability simplex. + + The code is adapted from Mathieu Blondel's BSD-licensed + `implementation `_ + (see function `projection_simplex_sort` in their repo) which is accompanying the paper + + Mathieu Blondel, Akinori Fujino, and Naonori Ueda. + Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex, + ICPR 2014, `URL `_ + + :param `unnormalized_arr`: point in n-dimensional space, shape `(n,)` + :return: projection of `unnormalized_arr` onto the (n-1)-dimensional probability simplex, shape `(n,)` + """ + unnormalized_arr = np.asarray(unnormalized_arr) + n = len(unnormalized_arr) + u = np.sort(unnormalized_arr)[::-1] + cssv = np.cumsum(u) - 1.0 + ind = np.arange(1, n + 1) + cond = u - cssv / ind > 0 + rho = ind[cond][-1] + theta = cssv[cond][-1] / float(rho) + return np.maximum(unnormalized_arr - theta, 0) + + +def softmax(prevalences: ArrayLike) -> np.ndarray: + """ + Applies the softmax function to all vectors even if the original vectors were valid distributions. + If you want to leave valid vectors untouched, use condsoftmax instead. + + :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values + :return: np.ndarray representing a valid distribution + """ + normalized = scipy.special.softmax(prevalences, axis=-1) + return normalized + + +def condsoftmax(prevalences: ArrayLike) -> np.ndarray: + """ + Applies the softmax function only to vectors that do not represent valid distributions. + + :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values + :return: np.ndarray representing a valid distribution + """ + invalid_idx = ~ check_prevalence_vector(prevalences, aggr=False, raise_exception=False) + if isinstance(invalid_idx, np.bool_) and invalid_idx: + # only one vector + normalized = scipy.special.softmax(prevalences) + else: + prevalences = np.copy(prevalences) + prevalences[invalid_idx] = scipy.special.softmax(prevalences[invalid_idx], axis=-1) + normalized = prevalences + return normalized + + +# ------------------------------------------------------------------------------------------ +# Divergences +# ------------------------------------------------------------------------------------------ def HellingerDistance(P: np.ndarray, Q: np.ndarray) -> float: """ @@ -128,188 +352,6 @@ def TopsoeDistance(P: np.ndarray, Q: np.ndarray, epsilon: float=1e-20): :return: float """ return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + Q*np.log((2*Q+epsilon)/(P+Q+epsilon))) - - -def uniform_prevalence_sampling(n_classes: int, size: int=1): - """ - Implements the `Kraemer algorithm `_ - for sampling uniformly at random from the unit simplex. This implementation is adapted from this - `post _`. - - :param n_classes: integer, number of classes (dimensionality of the simplex) - :param size: number of samples to return - :return: `np.ndarray` of shape `(size, n_classes,)` if `size>1`, or of shape `(n_classes,)` otherwise - """ - if n_classes == 2: - u = np.random.rand(size) - u = np.vstack([1-u, u]).T - else: - u = np.random.rand(size, n_classes-1) - u.sort(axis=-1) - _0s = np.zeros(shape=(size, 1)) - _1s = np.ones(shape=(size, 1)) - a = np.hstack([_0s, u]) - b = np.hstack([u, _1s]) - u = b-a - if size == 1: - u = u.flatten() - return u - - -uniform_simplex_sampling = uniform_prevalence_sampling - - -def strprev(prevalences: ArrayLike, prec: int=3): - """ - Returns a string representation for a prevalence vector. E.g., - - >>> strprev([1/3, 2/3], prec=2) - >>> '[0.33, 0.67]' - - :param prevalences: a vector of prevalence values - :param prec: float precision - :return: string - """ - return '['+ ', '.join([f'{p:.{prec}f}' for p in prevalences]) + ']' - - -def adjusted_quantification(prevalence_estim: ArrayLike, tpr: float, fpr: float, clip: bool=True): - """ - Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the - positive class `p` comes down to computing: - - .. math:: - ACC(p) = \\frac{ p - fpr }{ tpr - fpr } - - :param prevalence_estim: float, the estimated value for the positive class - :param tpr: float, the true positive rate of the classifier - :param fpr: float, the false positive rate of the classifier - :param clip: set to True (default) to clip values that might exceed the range [0,1] - :return: float, the adjusted count - """ - - den = tpr - fpr - if den == 0: - den += 1e-8 - adjusted = (prevalence_estim - fpr) / den - if clip: - adjusted = np.clip(adjusted, 0., 1.) - return adjusted - - -def normalize_prevalence(prevalences: ArrayLike): - """ - Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in - cases in which the prevalence values are not all-zeros, and to convert the prevalence values into `1/n_classes` in - cases in which all values are zero. - - :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values - :return: a normalized vector or matrix of prevalence values - """ - prevalences = np.asarray(prevalences) - n_classes = prevalences.shape[-1] - accum = prevalences.sum(axis=-1, keepdims=True) - prevalences = np.true_divide(prevalences, accum, where=accum>0) - allzeros = accum.flatten()==0 - if any(allzeros): - if prevalences.ndim == 1: - prevalences = np.full(shape=n_classes, fill_value=1./n_classes) - else: - prevalences[accum.flatten()==0] = np.full(shape=n_classes, fill_value=1./n_classes) - return prevalences - - -def __num_prevalence_combinations_depr(n_prevpoints:int, n_classes:int, n_repeats:int=1): - """ - Computes the number of prevalence combinations in the n_classes-dimensional simplex if `nprevpoints` equally distant - prevalence values are generated and `n_repeats` repetitions are requested. - - :param n_classes: integer, number of classes - :param n_prevpoints: integer, number of prevalence points. - :param n_repeats: integer, number of repetitions for each prevalence combination - :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the - number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] - """ - __cache={} - def __f(nc,np): - if (nc,np) in __cache: # cached result - return __cache[(nc,np)] - if nc==1: # stop condition - return 1 - else: # recursive call - x = sum([__f(nc-1, np-i) for i in range(np)]) - __cache[(nc,np)] = x - return x - return __f(n_classes, n_prevpoints) * n_repeats - - -def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1): - """ - Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if `n_prevpoints` equally - distant prevalence values are generated and `n_repeats` repetitions are requested. - The computation comes down to calculating: - - .. math:: - \\binom{N+C-1}{C-1} \\times r - - where `N` is `n_prevpoints-1`, i.e., the number of probability mass blocks to allocate, `C` is the number of - classes, and `r` is `n_repeats`. This solution comes from the - `Stars and Bars `_ problem. - - :param n_classes: integer, number of classes - :param n_prevpoints: integer, number of prevalence points. - :param n_repeats: integer, number of repetitions for each prevalence combination - :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the - number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] - """ - N = n_prevpoints-1 - C = n_classes - r = n_repeats - return int(scipy.special.binom(N + C - 1, C - 1) * r) - - -def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repeats:int=1): - """ - Searches for the largest number of (equidistant) prevalence points to define for each of the `n_classes` classes so - that the number of valid prevalence values generated as combinations of prevalence points (points in a - `n_classes`-dimensional simplex) do not exceed combinations_budget. - - :param combinations_budget: integer, maximum number of combinations allowed - :param n_classes: integer, number of classes - :param n_repeats: integer, number of repetitions for each prevalence combination - :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences - """ - assert n_classes > 0 and n_repeats > 0 and combinations_budget > 0, 'parameters must be positive integers' - n_prevpoints = 1 - while True: - combinations = num_prevalence_combinations(n_prevpoints, n_classes, n_repeats) - if combinations > combinations_budget: - return n_prevpoints-1 - else: - n_prevpoints += 1 - - -def check_prevalence_vector(prevalences: ArrayLike, raise_exception: bool=False, toleranze: float=1e-08): - """ - Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1. - - :param prevalences: the prevalence vector to check - :return: True if `p` is valid, False otherwise - """ - prevalences = np.asarray(prevalences) - if not all(prevalences >= 0): - if raise_exception: - raise ValueError('the prevalence vector contains negative numbers') - return False - if not all(prevalences <= 1): - if raise_exception: - raise ValueError('the prevalence vector contains values >1') - return False - if not np.isclose(prevalences.sum(), 1, atol=toleranze): - if raise_exception: - raise ValueError('the prevalence vector does not sum up to 1') - return False - return True def get_divergence(divergence: Union[str, Callable]): @@ -334,6 +376,10 @@ def get_divergence(divergence: Union[str, Callable]): raise ValueError(f'argument "divergence" not understood; use a str or a callable function') +# ------------------------------------------------------------------------------------------ +# Solvers +# ------------------------------------------------------------------------------------------ + def argmin_prevalence(loss: Callable, n_classes: int, method: Literal["optim_minimize", "linear_search", "ternary_search"]='optim_minimize'): @@ -353,7 +399,7 @@ def argmin_prevalence(loss: Callable, elif method == 'linear_search': return linear_search(loss, n_classes) elif method == 'ternary_search': - raise NotImplementedError() + ternary_search(loss, n_classes) else: raise NotImplementedError() @@ -401,94 +447,135 @@ def linear_search(loss: Callable, n_classes: int): return np.asarray([1 - prev_selected, prev_selected]) -def map_onto_probability_simplex(unnormalized_arr: ArrayLike) -> np.ndarray: - """Projects a point onto the probability simplex. +def ternary_search(loss: Callable, n_classes: int): + raise NotImplementedError() - The code is adapted from Mathieu Blondel's BSD-licensed - `implementation `_ - which is accompanying the paper - Mathieu Blondel, Akinori Fujino, and Naonori Ueda. - Large-scale Multiclass Support Vector Machine Training via Euclidean Projection onto the Simplex, - ICPR 2014, `URL `_ +# ------------------------------------------------------------------------------------------ +# Sampling utils +# ------------------------------------------------------------------------------------------ - :param unnormalized_arr: point in n-dimensional space, shape `(n,)` - :return: projection of `v` onto (n-1)-dimensional probability simplex, shape `(n,)` +def prevalence_linspace(grid_points:int=21, repeats:int=1, smooth_limits_epsilon:float=0.01) -> np.ndarray: """ - unnormalized_arr = np.asarray(unnormalized_arr) - n = len(unnormalized_arr) + Produces an array of uniformly separated values of prevalence. + By default, produces an array of 21 prevalence values, with + step 0.05 and with the limits smoothed, i.e.: + [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99] - # Sort the values in the descending order - u = np.sort(unnormalized_arr)[::-1] - - cssv = np.cumsum(u) - 1.0 - ind = np.arange(1, n + 1) - cond = u - cssv / ind > 0 - rho = ind[cond][-1] - theta = cssv[cond][-1] / float(rho) - return np.maximum(unnormalized_arr - theta, 0) - - -def clip_prevalence(prevalences: ArrayLike, method: Literal[None, "none", "clip", "project"]) -> np.ndarray: + :param grid_points: the number of prevalence values to sample from the [0,1] interval (default 21) + :param repeats: number of times each prevalence is to be repeated (defaults to 1) + :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 + :return: an array of uniformly separated prevalence values """ - Clips the proportions vector `prevalences` so that it is a valid probability distribution, i.e., all values - are in [0,1] and sum up to 1. + p = np.linspace(0., 1., num=grid_points, endpoint=True) + p[0] += smooth_limits_epsilon + p[-1] -= smooth_limits_epsilon + if p[0] > p[1]: + raise ValueError(f'the smoothing in the limits is greater than the prevalence step') + if repeats > 1: + p = np.repeat(p, repeats) + return p - :param prevalences: array-like, the proportions vector to be clipped, shape `(n_classes,)` - :param method: indicates the method to be used for normalization. - If `None` or `"none"`, no normalization is performed. - If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. - If `"project"`, the values are projected onto the probability simplex. - :return: the normalized prevalence vector, shape `(n_classes,)` + +def uniform_prevalence_sampling(n_classes: int, size: int=1) -> np.ndarray: """ - prevalences = np.asarray(prevalences) - if method in [None, "none"]: - return prevalences - elif method == "clip": - clipped = np.clip(prevalences, 0, 1) - adjusted = clipped / clipped.sum() - return adjusted - elif method == "project": - return map_onto_probability_simplex(prevalences) + Implements the `Kraemer algorithm `_ + for sampling uniformly at random from the unit simplex. This implementation is adapted from this + `post _`. + + :param n_classes: integer, number of classes (dimensionality of the simplex) + :param size: number of samples to return + :return: `np.ndarray` of shape `(size, n_classes,)` if `size>1`, or of shape `(n_classes,)` otherwise + """ + if n_classes == 2: + u = np.random.rand(size) + u = np.vstack([1-u, u]).T else: - raise ValueError(f'Unknown method {method}. Valid ones are "none", "clip", or "project"') + u = np.random.rand(size, n_classes-1) + u.sort(axis=-1) + _0s = np.zeros(shape=(size, 1)) + _1s = np.ones(shape=(size, 1)) + a = np.hstack([_0s, u]) + b = np.hstack([u, _1s]) + u = b-a + if size == 1: + u = u.flatten() + return u + + +uniform_simplex_sampling = uniform_prevalence_sampling + + +# ------------------------------------------------------------------------------------------ +# Adjustment +# ------------------------------------------------------------------------------------------ + +def solve_adjustment_binary(prevalence_estim: ArrayLike, tpr: float, fpr: float, clip: bool=True): + """ + Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the + positive class `p` comes down to computing: + + .. math:: + ACC(p) = \\frac{ p - fpr }{ tpr - fpr } + + :param float prevalence_estim: the estimated value for the positive class (`p` in the formula) + :param float tpr: the true positive rate of the classifier + :param float fpr: the false positive rate of the classifier + :param bool clip: set to True (default) to clip values that might exceed the range [0,1] + :return: float, the adjusted count + """ + + den = tpr - fpr + if den == 0: + den += 1e-8 + adjusted = (prevalence_estim - fpr) / den + if clip: + adjusted = np.clip(adjusted, 0., 1.) + return adjusted def solve_adjustment( - p_c_cond_y: np.ndarray, - p_c: np.ndarray, + class_conditional_rates: np.ndarray, + unadjusted_counts: np.ndarray, method: Literal["inversion", "invariant-ratio"], solver: Literal["exact", "minimize", "exact-raise", "exact-cc"]) -> np.ndarray: """ - Function that tries to solve for the equation :math:`P(C)=P(C|Y)P(Y)`, where :math:`P(C)` is the vector of - prevalence values obtained by a classify and count, and :math:`P(C|Y)` are the class-conditional misclassification - rates of the classifier. + Function that tries to solve for :math:`p` the equation :math:`q = M p`, where :math:`q` is the vector of + `unadjusted counts` (as estimated, e.g., via classify and count) with :math:`q_i` an estimate of + :math:`P(\hat{Y}=y_i)`, and where :math:`M` is the matrix of `class-conditional rates` with :math:`M_{ij}` an + estimate of :math:`P(\hat{Y}=y_i|Y=y_j)`. - :param p_c_cond_y: array of shape `(n_classes, n_classes,)` with entry `(c,y)` being the estimate - of :math:`P(C=c|Y=y)`, that is, the probability that an instance that belongs to class :math:`y` - ends up being classified as belonging to class :math:`c` + :param class_conditional_rates: array of shape `(n_classes, n_classes,)` with entry `(i,j)` being the estimate + of :math:`P(\hat{Y}=y_i|Y=y_j)`, that is, the probability that an instance that belongs to class :math:`y_j` + ends up being classified as belonging to class :math:`y_i` - :param p_c: array of shape `(n_classes,)` containing the prevalence values as estimated by classify and count + :param unadjusted_counts: array of shape `(n_classes,)` containing the unadjusted prevalence values (e.g., as + estimated by CC or PCC) :param str method: indicates the adjustment method to be used. Valid options are: - * 'inversion': tries to solve the equation :math:`P(C)=P(C|Y)P(Y)` as :math:`P(Y) = P(C|Y)^{-1} P(C)` where :math:`P(C|Y)^{-1}` is the matrix inversion of :math:`P(C|Y)`. This inversion may not exist in degenerated cases - * 'invariant-ratio': invariant ratio estimator of `Vaz et al. 2018 `_, which replaces the last equation with the normalization condition. + * `inversion`: tries to solve the equation :math:`q = M p` as :math:`p = M^{-1} q` where + :math:`M^{-1}` is the matrix inversion of :math:`M`. This inversion may not exist in + degenerated cases. + * `invariant-ratio`: invariant ratio estimator of `Vaz et al. 2018 `_, + which replaces the last equation in :math:`M` with the normalization condition (i.e., that the sum of + all prevalence values must equal 1). :param str solver: the method to use for solving the system of linear equations. Valid options are: - * 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has rank strictly less than `n_classes`. - * 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) - * 'exact': deprecated, defaults to 'exact-cc' - * 'minimize': minimizes a loss, so the solution always exists + * `exact-raise`: tries to solve the system using matrix inversion. Raises an error if the matrix has rank + strictly lower than `n_classes`. + * `exact-cc`: if the matrix is not full rank, returns :math:`q` (i.e., the unadjusted counts) as the estimates + * `exact`: deprecated, defaults to 'exact-cc' (will be removed in future versions) + * `minimize`: minimizes a loss, so the solution always exists """ if solver == "exact": warnings.warn( "The 'exact' solver is deprecated. Use 'exact-raise' or 'exact-cc'", DeprecationWarning, stacklevel=2) solver = "exact-cc" - A = np.asarray(p_c_cond_y, dtype=float) - B = np.asarray(p_c, dtype=float) + A = np.asarray(class_conditional_rates, dtype=float) + B = np.asarray(unadjusted_counts, dtype=float) if method == "inversion": pass # We leave A and B unchanged @@ -497,13 +584,13 @@ def solve_adjustment( A[-1, :] = 1.0 B[-1] = 1.0 else: - raise ValueError(f"Method {method} not known.") + raise ValueError(f"unknown {method=}") if solver == "minimize": def loss(prev): return np.linalg.norm(A @ prev - B) return optim_minimize(loss, n_classes=A.shape[0]) - else: + elif solver in ["exact-raise", "exact-cc"]: # Solvers based on matrix inversion, so we use try/except block try: return np.linalg.solve(A, B) @@ -514,6 +601,8 @@ def solve_adjustment( if solver == "exact-raise": raise elif solver == "exact-cc": - return p_c + return unadjusted_counts else: raise ValueError(f"Solver {solver} not known.") + else: + raise ValueError(f'unknown {solver=}') diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index bba0c15..46e56d7 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -394,21 +394,30 @@ class ACC(AggregativeCrispQuantifier): :param str method: adjustment method to be used: - * 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, which tries to invert :math:`P(C|Y)` matrix. - * 'invariant-ratio': invariant ratio estimator of `Vaz et al. 2018 `_, which replaces the last equation with the normalization condition. + * 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, + which tries to invert :math:`P(C|Y)` matrix. + * 'invariant-ratio': invariant ratio estimator of `Vaz et al. 2018 `_, + which replaces the last equation with the normalization condition. :param str solver: indicates the method to use for solving the system of linear equations. Valid options are: - * 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has rank strictly less than `n_classes`. - * 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) + * 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has rank + strictly less than `n_classes`. + * 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds to + no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) * 'exact': deprecated, defaults to 'exact-cc' - * 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the default parameter. More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. + * 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the + default parameter. More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of + Adjusted Classify and Count", on proceedings of the 2nd International Workshop on Learning to Quantify: + Methods and Applications (LQ 2022), ECML/PKDD 2022, Grenoble (France) + `_. - :param str clipping: the method to use for normalization. + :param str norm: the method to use for normalization. - * If `None` or `"none"`, no normalization is performed. - * If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. - * If `"project"`, the values are projected onto the probability simplex. + * `clip`, the values are clipped to the range [0,1] and then L1-normalized. + * `mapsimplex` projects vectors onto the probability simplex. This implementation relies on + `Mathieu Blondel's projection_simplex_sort `_ + * `condsoftmax`, applies a softmax normalization only to prevalence vectors that lie outside the simplex :param n_jobs: number of parallel workers """ @@ -418,26 +427,25 @@ class ACC(AggregativeCrispQuantifier): val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', - clipping: Literal['clip', 'none', 'project'] = 'clip', + norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip', n_jobs=None, - ): + ): self.classifier = classifier self.val_split = val_split self.n_jobs = qp._get_njobs(n_jobs) self.solver = solver self.method = method - self.clipping = clipping + self.norm = norm SOLVERS = ['exact', 'minimize', 'exact-raise', 'exact-cc'] METHODS = ['inversion', 'invariant-ratio'] - CLIPPING = ['clip', 'none', 'project', None] - + NORMALIZATIONS = ['clip', 'mapsimplex', 'condsoftmax', None] @classmethod def newInvariantRatioEstimation(cls, classifier: BaseEstimator, val_split=5, n_jobs=None): """ Constructs a quantifier that implements the Invariant Ratio Estimator of - `Vaz et al. 2018 _`. This amounts + `Vaz et al. 2018 `_. This amounts to setting method to 'invariant-ratio' and clipping to 'project'. :param classifier: a sklearn's Estimator that generates a classifier @@ -451,15 +459,15 @@ class ACC(AggregativeCrispQuantifier): :param n_jobs: number of parallel workers :return: an instance of ACC configured so that it implements the Invariant Ratio Estimator """ - return ACC(classifier, val_split=val_split, method='invariant-ratio', clipping='project', n_jobs=n_jobs) + return ACC(classifier, val_split=val_split, method='invariant-ratio', norm='mapsimplex', n_jobs=n_jobs) def _check_init_parameters(self): if self.solver not in ACC.SOLVERS: raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}") if self.method not in ACC.METHODS: raise ValueError(f"unknown method; valid ones are {ACC.METHODS}") - if self.clipping not in ACC.CLIPPING: - raise ValueError(f"unknown clipping; valid ones are {ACC.CLIPPING}") + if self.norm not in ACC.NORMALIZATIONS: + raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}") def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): """ @@ -497,12 +505,12 @@ class ACC(AggregativeCrispQuantifier): def aggregate(self, classif_predictions): prevs_estim = self.cc.aggregate(classif_predictions) estimate = F.solve_adjustment( - p_c_cond_y=self.Pte_cond_estim_, - p_c=prevs_estim, + class_conditional_rates=self.Pte_cond_estim_, + unadjusted_counts=prevs_estim, solver=self.solver, method=self.method, ) - return F.clip_prevalence(estimate, method=self.clipping) + return F.normalize_prevalence(estimate, method=self.norm) class PACC(AggregativeSoftQuantifier): @@ -521,21 +529,30 @@ class PACC(AggregativeSoftQuantifier): :param str method: adjustment method to be used: - * 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, which tries to invert `P(C|Y)` matrix. - * 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, which replaces the last equation with the normalization condition. + * 'inversion': matrix inversion method based on the matrix equality :math:`P(C)=P(C|Y)P(Y)`, + which tries to invert `P(C|Y)` matrix. + * 'invariant-ratio': invariant ratio estimator of `Vaz et al. `_, + which replaces the last equation with the normalization condition. :param str solver: the method to use for solving the system of linear equations. Valid options are: - * 'exact-raise': tries to solve the system using matrix inversion. Raises an error if the matrix has rank strictly less than `n_classes`. - * 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which corresponds to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) + * 'exact-raise': tries to solve the system using matrix inversion. + Raises an error if the matrix has rank strictly less than `n_classes`. + * 'exact-cc': if the matrix is not of full rank, returns `p_c` as the estimates, which + corresponds to no adjustment (i.e., the classify and count method. See :class:`quapy.method.aggregative.CC`) * 'exact': deprecated, defaults to 'exact-cc' - * 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the default parameter. More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions of Adjusted Classify and Count", on proceedings of the 2nd International Workshop on Learning to Quantify: Methods and Applications (LQ 2022), ECML/PKDD 2022, Grenoble (France) `_. + * 'minimize': minimizes the L2 norm of :math:`|Ax-B|`. This one generally works better, and is the + default parameter. More details about this can be consulted in `Bunse, M. "On Multi-Class Extensions + of Adjusted Classify and Count", on proceedings of the 2nd International Workshop on Learning to + Quantify: Methods and Applications (LQ 2022), ECML/PKDD 2022, Grenoble (France) + `_. - :param str clipping: the method to use for normalization. + :param str norm: the method to use for normalization. - * If `None` or `"none"`, no normalization is performed. - * If `"clip"`, the values are clipped to the range [0,1] and normalized, so they sum up to 1. - * If `"project"`, the values are projected onto the probability simplex. + * `clip`, the values are clipped to the range [0,1] and then L1-normalized. + * `mapsimplex` projects vectors onto the probability simplex. This implementation relies on + `Mathieu Blondel's projection_simplex_sort `_ + * `condsoftmax`, applies a softmax normalization only to prevalence vectors that lie outside the simplex :param n_jobs: number of parallel workers """ @@ -545,24 +562,23 @@ class PACC(AggregativeSoftQuantifier): val_split=5, solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize', method: Literal['inversion', 'invariant-ratio'] = 'inversion', - clipping: Literal['clip', 'none', 'project'] = 'clip', - n_jobs=None, - ): + norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip', + n_jobs=None + ): self.classifier = classifier self.val_split = val_split self.n_jobs = qp._get_njobs(n_jobs) - self.solver = solver self.method = method - self.clipping = clipping + self.norm = norm def _check_init_parameters(self): if self.solver not in ACC.SOLVERS: raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}") if self.method not in ACC.METHODS: raise ValueError(f"unknown method; valid ones are {ACC.METHODS}") - if self.clipping not in ACC.CLIPPING: - raise ValueError(f"unknown clipping; valid ones are {ACC.CLIPPING}") + if self.clipping not in ACC.NORMALIZATIONS: + raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}") def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection): """ @@ -580,12 +596,12 @@ class PACC(AggregativeSoftQuantifier): prevs_estim = self.pcc.aggregate(classif_posteriors) estimate = F.solve_adjustment( - p_c_cond_y=self.Pte_cond_estim_, - p_c=prevs_estim, + class_conditional_rates=self.Pte_cond_estim_, + unadjusted_counts=prevs_estim, solver=self.solver, method=self.method, ) - return F.clip_prevalence(estimate, method=self.clipping) + return F.normalize_prevalence(estimate, method=self.norm) @classmethod def getPteCondEstim(cls, classes, y, y_): diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 12b3386..107f303 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -25,6 +25,7 @@ class Status(Enum): class ConfigStatus: + def __init__(self, params, status, msg=''): self.params = params self.status = status