From ba18d003340aa9e6ee46f8395b35c0d2f02a50af Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Mon, 20 Dec 2021 11:39:44 +0100 Subject: [PATCH 01/59] trying to figure out how to refactor protocols meaninguflly --- quapy/__init__.py | 2 +- quapy/data/base.py | 85 ++++++---------------------------------- quapy/functional.py | 30 -------------- quapy/model_selection.py | 2 +- 4 files changed, 15 insertions(+), 104 deletions(-) diff --git a/quapy/__init__.py b/quapy/__init__.py index a1ccee4..ad69ae9 100644 --- a/quapy/__init__.py +++ b/quapy/__init__.py @@ -10,7 +10,7 @@ from . import model_selection from . import classification from quapy.method.base import isprobabilistic, isaggregative -__version__ = '0.1.6' +__version__ = '0.1.7' environ = { 'SAMPLE_SIZE': None, diff --git a/quapy/data/base.py b/quapy/data/base.py index a59d8d2..cfe2891 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -3,7 +3,7 @@ from scipy.sparse import issparse from scipy.sparse import vstack from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold -from quapy.functional import artificial_prevalence_sampling, strprev +from quapy.functional import strprev class LabelledCollection: @@ -120,21 +120,24 @@ class LabelledCollection: assert len(prevs) == self.n_classes, 'unexpected number of prevalences' assert sum(prevs) == 1, f'prevalences ({prevs}) wrong range (sum={sum(prevs)})' - taken = 0 - indexes_sample = [] - for i, class_ in enumerate(self.classes_): - if i == self.n_classes - 1: - n_requested = size - taken - else: - n_requested = int(size * prevs[i]) + # Decide how many instances should be taken for each class in order to satisfy the requested prevalence + # accurately, and the number of instances in the sample (exactly). If int(size * prevs[i]) (which is + # <= size * prevs[i]) examples are drawn from class i, there could be a remainder number of instances to take + # to satisfy the size constrain. The remainder is distributed along the classes with probability = prevs. + # (This aims at avoiding the remainder to be placed in a class for which the prevalence requested is 0.) + n_requests = {class_: int(size * prevs[i]) for i, class_ in enumerate(self.classes_)} + remainder = size - sum(n_requests.values()) + for rand_class in np.random.choice(self.classes_, size=remainder, p=prevs): + n_requests[rand_class] += 1 + indexes_sample = [] + for class_, n_requested in n_requests.items(): n_candidates = len(self.index[class_]) index_sample = self.index[class_][ np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates)) ] if n_requested > 0 else [] indexes_sample.append(index_sample) - taken += n_requested indexes_sample = np.concatenate(indexes_sample).astype(int) @@ -152,7 +155,7 @@ class LabelledCollection: :param size: integer, the size of the uniform sample :return: a np.ndarray of shape `(size)` with the indexes """ - return np.random.choice(len(self), size, replace=False) + return np.random.choice(len(self), size, replace=size > len(self)) def sampling(self, size, *prevs, shuffle=True): """ @@ -212,68 +215,6 @@ class LabelledCollection: random_state=random_state) return LabelledCollection(tr_docs, tr_labels), LabelledCollection(te_docs, te_labels) - def artificial_sampling_generator(self, sample_size, n_prevalences=101, repeats=1): - """ - A generator of samples that implements the artificial prevalence protocol (APP). - The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., - [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid - combination of prevalence values is indicated by `repeats`. - - :param sample_size: the number of instances in each sample - :param n_prevalences: the number of prevalence points to be taken from the [0,1] interval (including the - limits {0,1}). E.g., if `n_prevalences=11`, then the prevalence points to take are [0, 0.1, 0.2, ..., 1] - :param repeats: the number of samples to generate for each valid combination of prevalence values (default 1) - :return: yield samples generated at artificially controlled prevalence values - """ - dimensions = self.n_classes - for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats): - yield self.sampling(sample_size, *prevs) - - def artificial_sampling_index_generator(self, sample_size, n_prevalences=101, repeats=1): - """ - A generator of sample indexes implementing the artificial prevalence protocol (APP). - The APP consists of exploring - a grid of prevalence values (e.g., [0, 0.05, 0.1, 0.15, ..., 1]), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of sample indexes for each valid - combination of prevalence values is indicated by `repeats` - - :param sample_size: the number of instances in each sample (i.e., length of each index) - :param n_prevalences: the number of prevalence points to be taken from the [0,1] interval (including the - limits {0,1}). E.g., if `n_prevalences=11`, then the prevalence points to take are [0, 0.1, 0.2, ..., 1] - :param repeats: the number of samples to generate for each valid combination of prevalence values (default 1) - :return: yield the indexes that generate the samples according to APP - """ - dimensions = self.n_classes - for prevs in artificial_prevalence_sampling(dimensions, n_prevalences, repeats): - yield self.sampling_index(sample_size, *prevs) - - def natural_sampling_generator(self, sample_size, repeats=100): - """ - A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing - samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. - - :param sample_size: integer, the number of instances in each sample - :param repeats: the number of samples to generate - :return: yield instances of :class:`LabelledCollection` - """ - for _ in range(repeats): - yield self.uniform_sampling(sample_size) - - def natural_sampling_index_generator(self, sample_size, repeats=100): - """ - A generator of sample indexes according to the natural prevalence protocol (NPP). The NPP consists of drawing - samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. - - :param sample_size: integer, the number of instances in each sample (i.e., the length of each index) - :param repeats: the number of indexes to generate - :return: yield `repeats` instances of np.ndarray with shape `(sample_size,)` - """ - for _ in range(repeats): - yield self.uniform_sampling_index(sample_size) - def __add__(self, other): """ Returns a new :class:`LabelledCollection` as the union of this collection with another collection diff --git a/quapy/functional.py b/quapy/functional.py index a8b17f6..e42d743 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -4,36 +4,6 @@ import scipy import numpy as np -def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False): - """ - Generates vectors of prevalence values artificially drawn from an exhaustive grid of prevalence values. The - number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example, - `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only - valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each - valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be - implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained - to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to 1). - - :param dimensions: the number of classes - :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the grid - (default is 21) - :param repeat: number of copies for each valid prevalence vector (default is 1) - :param return_constrained_dim: set to True to return all dimensions, or to False (default) for ommitting the - constrained dimension - :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape `(n, dimensions-1)` - if `return_constrained_dim=False`, where `n` is the number of valid combinations found in the grid multiplied - by `repeat` - """ - s = np.linspace(0., 1., n_prevalences, endpoint=True) - s = [s] * (dimensions - 1) - prevs = [p for p in itertools.product(*s, repeat=1) if sum(p)<=1] - if return_constrained_dim: - prevs = [p+(1-sum(p),) for p in prevs] - prevs = np.asarray(prevs).reshape(len(prevs), -1) - if repeat>1: - prevs = np.repeat(prevs, repeat, axis=0) - return prevs - def prevalence_linspace(n_prevalences=21, repeats=1, smooth_limits_epsilon=0.01): """ diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 5af4b2f..eef811b 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -21,7 +21,7 @@ class GridSearchQ(BaseQuantifier): :param model: the quantifier to optimize :type model: BaseQuantifier :param param_grid: a dictionary with keys the parameter names and values the list of values to explore - :param sample_size: the size of the samples to extract from the validation set (ignored if protocl='gen') + :param sample_size: the size of the samples to extract from the validation set (ignored if protocol='gen') :param protocol: either 'app' for the artificial prevalence protocol, 'npp' for the natural prevalence protocol, or 'gen' for using a custom sampling generator function :param n_prevpoints: if specified, indicates the number of equally distant points to extract from the interval From be7a126c9492969a41e925507e4d67a7bcda7552 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Thu, 7 Apr 2022 16:48:31 +0200 Subject: [PATCH 02/59] update todo things --- TODO.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/TODO.txt b/TODO.txt index 8a674a7..c20e901 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,7 +1,11 @@ +sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified +clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.) +make truly parallel the GridSearchQ +abstract protocols + Packaging: ========================================== -Documentation with sphinx Document methods with paper references unit-tests clean wiki_examples! From b453c8fcbc89a9c69718c0f82916b4292e573fd1 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 20 May 2022 16:48:46 +0200 Subject: [PATCH 03/59] first commit protocols --- quapy/functional.py | 21 ++++ quapy/newprotocol.py | 244 +++++++++++++++++++++++++++++++++++++++++++ quapy/protocol.py | 179 +++++++++++++++++++++++++++++++ 3 files changed, 444 insertions(+) create mode 100644 quapy/newprotocol.py create mode 100644 quapy/protocol.py diff --git a/quapy/functional.py b/quapy/functional.py index e42d743..215d89f 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -239,3 +239,24 @@ def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repe else: n_prevpoints += 1 + +def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08): + """ + Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1. + :param p: the prevalence vector to check + :return: True if `p` is valid, False otherwise + """ + p = np.asarray(p) + if not all(p>=0): + if raise_exception: + raise ValueError('the prevalence vector contains negative numbers') + return False + if not all(p<=1): + if raise_exception: + raise ValueError('the prevalence vector contains values >1') + return False + if not np.isclose(p.sum(), 1, atol=toleranze): + if raise_exception: + raise ValueError('the prevalence vector does not sum up to 1') + return False + return True diff --git a/quapy/newprotocol.py b/quapy/newprotocol.py new file mode 100644 index 0000000..799f79b --- /dev/null +++ b/quapy/newprotocol.py @@ -0,0 +1,244 @@ +import itertools +from collections.abc import Generator +from contextlib import ExitStack +from abc import ABCMeta, abstractmethod + +from quapy.data import LabelledCollection +import quapy.functional as F + + +# 0.1.7 +# change the LabelledCollection API (removing protocol-related samplings) +# need to change the two references to the above in the wiki / doc, and code examples... +# removed artificial_prevalence_sampling from functional + + +# class AbstractProtocol(metaclass=ABCMeta): +# def __call__(self): +# for g in self.gen(): +# yield g +# +# @abstractmethod +# def gen(self): +# ... + + +class AbstractStochasticProtocol(metaclass=ABCMeta): + def __init__(self, seed=None): + self.random_seed = seed + + @property + def random_seed(self): + return self._random_seed + + @random_seed.setter + def random_seed(self, seed): + self._random_seed = seed + + @abstractmethod + def samples_parameters(self): + """ + This function has to return all the necessary parameters to replicate the samples + :return: a list of parameters, each of which serves to deterministically generate a sample + """ + ... + + @abstractmethod + def sample(self, params): + """ + Extract one sample determined by the given parameters + + :param params: all the necessary parameters to generate a sample + :return: one sample (the same sample has to be generated for the same parameters) + """ + ... + + def __call__(self): + with ExitStack() as stack: + if self.random_seed is not None: + stack.enter_context(qp.util.temp_seed(self.random_seed)) + for params in self.samples_parameters(): + yield self.sample(params) + + +class APP(AbstractStochasticProtocol): + """ + Implementation of the artificial prevalence protocol (APP). + The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., + [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of + prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., + [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid + combination of prevalence values is indicated by `repeats`. + + :param sample_size: integer, number of instances in each sample + :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the + grid (default is 21) + :param repeats: number of copies for each valid prevalence vector (default is 1) + :param random_seed: allows replicating samples across runs (default None) + """ + + def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=1, random_seed=None): + super(APP, self).__init__(random_seed) + self.data = data + self.sample_size = sample_size + self.n_prevalences = n_prevalences + self.repeats = repeats + + def prevalence_grid(self, dimensions): + """ + Generates vectors of prevalence values from an exhaustive grid of prevalence values. The + number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example, + `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only + valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each + valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be + implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained + to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to + 1). Note that this method is deterministic, i.e., there is no random sampling anywhere. + + :param dimensions: the number of classes + :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape + `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found + in the grid multiplied by `repeat` + """ + s = np.linspace(0., 1., self.n_prevalences, endpoint=True) + s = [s] * (dimensions - 1) + prevs = [p for p in itertools.product(*s, repeat=1) if sum(p) <= 1] + prevs = np.asarray(prevs).reshape(len(prevs), -1) + if self.repeats > 1: + prevs = np.repeat(prevs, self.repeats, axis=0) + return prevs + + def samples_parameters(self): + indexes = [] + for prevs in self.prevalence_grid(dimensions=self.data.n_classes): + index = data.sampling_index(self.sample_size, *prevs) + indexes.append(index) + return indexes + + def sample(self, index): + return self.data.sampling_from_index(index) + + +class NPP(AbstractStochasticProtocol): + """ + A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing + samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. + + :param sample_size: integer, the number of instances in each sample + :param repeats: the number of samples to generate + """ + + def __init__(self, data:LabelledCollection, sample_size, repeats=1, random_seed=None): + super(NPP, self).__init__(random_seed) + self.data = data + self.sample_size = sample_size + self.repeats = repeats + self.random_seed = random_seed + + def samples_parameters(self): + indexes = [] + for _ in range(self.repeats): + index = data.uniform_sampling_index(self.sample_size) + indexes.append(index) + return indexes + + def sample(self, index): + return self.data.sampling_from_index(index) + + +class USimplexPP(AbstractStochasticProtocol): + + def __init__(self, data: LabelledCollection, sample_size, repeats=1, random_seed=None): + super(USimplexPP, self).__init__(random_seed) + self.data = data + self.sample_size = sample_size + self.repeats = repeats + self.random_seed = random_seed + + def samples_parameters(self): + indexes = [] + for prevs in F.uniform_simplex_sampling(n_classes=data.n_classes, size=self.repeats): + index = data.sampling_index(self.sample_size, *prevs) + indexes.append(index) + return indexes + + def sample(self, index): + return self.data.sampling_from_index(index) + + +class CovariateShift(AbstractStochasticProtocol): + """ + Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. + + :param domainA: + :param domainB: + :param sample_size: + :param repeats: + :param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing + one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence + will be taken from the domain A (default). + :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will + generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself. + the specific points + :param random_seed: + """ + + def __init__( + self, + domainA: LabelledCollection, + domainB: LabelledCollection, + sample_size, + repeats=1, + prevalence=None, + mixture_points=11, + random_seed=None): + super(CovariateShift, self).__init__(random_seed) + self.data = data + self.sample_size = sample_size + self.repeats = repeats + if prevalence is None: + self.prevalence = domainA.prevalence() + else: + self.prevalence = np.asarray(prevalence) + assert len(self.prevalence) == domainA.n_classes, \ + f'wrong shape for the vector prevalence (expected {domainA.n_classes})' + assert F.check_prevalence_vector(self.prevalence), \ + f'the prevalence vector is not valid (either it contains values outside [0,1] or does not sum up to 1)' + assert isinstance(mixture_points, int) or + self.random_seed = random_seed + + def samples_parameters(self): + indexes = [] + for _ in range(self.repeats): + index = data.uniform_sampling_index(self.sample_size) + indexes.append(index) + return indexes + + def sample(self, index): + return self.data.sampling_from_index(index) + + +if __name__=='__main__': + import numpy as np + import quapy as qp + + y = [0]*25 + [1]*25 + [2]*25 + [3]*25 + X = [str(i)+'-'+str(yi) for i, yi in enumerate(y)] + + data = LabelledCollection(X, y, classes_=sorted(np.unique(y))) + + # p=CounterExample(1, 8, 10, 5) + + # p = APP(data, sample_size=10, n_prevalences=11, random_seed=42) + # p = NPP(data, sample_size=10, repeats=10, random_seed=42) + # p = NPP(data, sample_size=10, repeats=10) + p = USimplexPP(data, sample_size=10, repeats=10) + + for _ in range(2): + print('init generator', p.__class__.__name__) + for i in p(): + # print(i) + print(i.instances, i.labels, i.prevalence()) + + print('done') + diff --git a/quapy/protocol.py b/quapy/protocol.py new file mode 100644 index 0000000..99f2522 --- /dev/null +++ b/quapy/protocol.py @@ -0,0 +1,179 @@ +import itertools +from collections.abc import Generator +from contextlib import ExitStack +from abc import ABCMeta, abstractmethod + +from quapy.data import LabelledCollection +import quapy.functional as F + + +# 0.1.7 +# change the LabelledCollection API (removing protocol-related samplings) +# need to change the two references to the above in the wiki / doc, and code examples... +# removed artificial_prevalence_sampling from functional + + +class NewAbstractProtocol(metaclass=Generator): + @abstractmethod + def send(self, value): + """Send a value into the generator. + Return next yielded value or raise StopIteration. + """ + raise StopIteration + + @abstractmethod + def throw(self, typ, val=None, tb=None): + """Raise an exception in the generator. + Return next yielded value or raise StopIteration. + """ + if val is None: + if tb is None: + raise typ + val = typ() + if tb is not None: + val = val.with_traceback(tb) + raise val + + + +class AbstractProtocol(metaclass=ABCMeta): + """ + Abstract class for sampling protocols. + A sampling protocol defines how to generate samples out of some dataset. + """ + + def __call__(self): + """ + A generator that yields one sample at each iteration + + :return: yield one sample (instance of :class:`quapy.data.LabelledCollection`) at each iteration + """ + for index in self.indexes(data): + yield data.sampling_from_index(index) + + def indexes(self, data: LabelledCollection): + """ + A generator that yields one sample index at each iteration. + (This function is mainly a generic decorator that sets, if requested, the local random seed; the real + sampling is implemented by :meth:`_indexes`.) + + :param data: the set of data from which samples' indexes are to be drawn + :return: one sample index (instance of `np.ndarray`) at each iteration + """ + with ExitStack() as stack: + if self.random_seed is not None: + stack.enter_context(qp.util.temp_seed(self.random_seed)) + for index in self._indexes(data): + yield index + + @abstractmethod + def _indexes(self, data: LabelledCollection): + ... + + +class APP(AbstractProtocol): + """ + Implementation of the artificial prevalence protocol (APP). + The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., + [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of + prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., + [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid + combination of prevalence values is indicated by `repeats`. + + :param sample_size: integer, number of instances in each sample + :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the + grid (default is 21) + :param repeats: number of copies for each valid prevalence vector (default is 1) + :param random_seed: allows replicating samples across runs (default None) + """ + + def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=1, random_seed=None): + self.data = data + self.sample_size = sample_size + self.n_prevalences = n_prevalences + self.repeats = repeats + self.random_seed = random_seed + + def _indexes(self, data: LabelledCollection): + for prevs in self.prevalence_grid(dimensions=data.n_classes): + yield data.sampling_index(self.sample_size, *prevs) + + def prevalence_grid(self, dimensions, return_constrained_dim=False): + """ + Generates vectors of prevalence values from an exhaustive grid of prevalence values. The + number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example, + `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only + valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each + valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be + implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained + to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to + 1). Note that this method is deterministic, i.e., there is no random sampling anywhere. + + :param dimensions: the number of classes + :param return_constrained_dim: set to True to return all dimensions, or to False (default) for ommitting the + constrained dimension + :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape + `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found + in the grid multiplied by `repeat` + """ + s = np.linspace(0., 1., self.n_prevalences, endpoint=True) + s = [s] * (dimensions - 1) + prevs = [p for p in itertools.product(*s, repeat=1) if sum(p) <= 1] + if return_constrained_dim: + prevs = [p + (1 - sum(p),) for p in prevs] + prevs = np.asarray(prevs).reshape(len(prevs), -1) + if self.repeats > 1: + prevs = np.repeat(prevs, self.repeats, axis=0) + return prevs + + +class NPP(AbstractProtocol): + """ + A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing + samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. + + :param sample_size: integer, the number of instances in each sample + :param repeats: the number of samples to generate + """ + + def __init__(self, sample_size, repeats=1, random_seed=None): + self.sample_size = sample_size + self.repeats = repeats + self.random_seed = random_seed + + def _indexes(self, data: LabelledCollection): + for _ in range(self.repeats): + yield data.uniform_sampling_index(self.sample_size) + + +class USimplexPP(AbstractProtocol): + + def __init__(self, sample_size, repeats=1, random_seed=None): + self.sample_size = sample_size + self.repeats = repeats + self.random_seed = random_seed + + def _indexes(self, data: LabelledCollection): + for prevs in F.uniform_simplex_sampling(n_classes=data.n_classes, size=self.repeats): + yield data.sampling_index(self.sample_size, *prevs) + + + +if __name__=='__main__': + import numpy as np + import quapy as qp + + y = [0]*25 + [1]*25 + [2]*25 + [3]*25 + X = [str(i)+'-'+str(yi) for i, yi in enumerate(y)] + + data = LabelledCollection(X, y, classes_=sorted(np.unique(y))) + + # p = APP(10, n_prevalences=11, random_seed=42) + # p = NPP(10, repeats=10, random_seed=42) + p = USimplexPP(10, repeats=10, random_seed=42) + + for i in p(data): + print(i.instances, i.classes, i.prevalence()) + + print('done') + From 46e3632200e2e7d54814bd2b1a4d91c944f32a0a Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Mon, 23 May 2022 00:20:08 +0200 Subject: [PATCH 04/59] ongoing protocols --- quapy/{evaluation.py => depr_evaluation.py} | 0 quapy/newprotocol.py | 244 ---------------- quapy/protocol.py | 293 +++++++++++++++----- 3 files changed, 223 insertions(+), 314 deletions(-) rename quapy/{evaluation.py => depr_evaluation.py} (100%) delete mode 100644 quapy/newprotocol.py diff --git a/quapy/evaluation.py b/quapy/depr_evaluation.py similarity index 100% rename from quapy/evaluation.py rename to quapy/depr_evaluation.py diff --git a/quapy/newprotocol.py b/quapy/newprotocol.py deleted file mode 100644 index 799f79b..0000000 --- a/quapy/newprotocol.py +++ /dev/null @@ -1,244 +0,0 @@ -import itertools -from collections.abc import Generator -from contextlib import ExitStack -from abc import ABCMeta, abstractmethod - -from quapy.data import LabelledCollection -import quapy.functional as F - - -# 0.1.7 -# change the LabelledCollection API (removing protocol-related samplings) -# need to change the two references to the above in the wiki / doc, and code examples... -# removed artificial_prevalence_sampling from functional - - -# class AbstractProtocol(metaclass=ABCMeta): -# def __call__(self): -# for g in self.gen(): -# yield g -# -# @abstractmethod -# def gen(self): -# ... - - -class AbstractStochasticProtocol(metaclass=ABCMeta): - def __init__(self, seed=None): - self.random_seed = seed - - @property - def random_seed(self): - return self._random_seed - - @random_seed.setter - def random_seed(self, seed): - self._random_seed = seed - - @abstractmethod - def samples_parameters(self): - """ - This function has to return all the necessary parameters to replicate the samples - :return: a list of parameters, each of which serves to deterministically generate a sample - """ - ... - - @abstractmethod - def sample(self, params): - """ - Extract one sample determined by the given parameters - - :param params: all the necessary parameters to generate a sample - :return: one sample (the same sample has to be generated for the same parameters) - """ - ... - - def __call__(self): - with ExitStack() as stack: - if self.random_seed is not None: - stack.enter_context(qp.util.temp_seed(self.random_seed)) - for params in self.samples_parameters(): - yield self.sample(params) - - -class APP(AbstractStochasticProtocol): - """ - Implementation of the artificial prevalence protocol (APP). - The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., - [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid - combination of prevalence values is indicated by `repeats`. - - :param sample_size: integer, number of instances in each sample - :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the - grid (default is 21) - :param repeats: number of copies for each valid prevalence vector (default is 1) - :param random_seed: allows replicating samples across runs (default None) - """ - - def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=1, random_seed=None): - super(APP, self).__init__(random_seed) - self.data = data - self.sample_size = sample_size - self.n_prevalences = n_prevalences - self.repeats = repeats - - def prevalence_grid(self, dimensions): - """ - Generates vectors of prevalence values from an exhaustive grid of prevalence values. The - number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example, - `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only - valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each - valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be - implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained - to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to - 1). Note that this method is deterministic, i.e., there is no random sampling anywhere. - - :param dimensions: the number of classes - :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape - `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found - in the grid multiplied by `repeat` - """ - s = np.linspace(0., 1., self.n_prevalences, endpoint=True) - s = [s] * (dimensions - 1) - prevs = [p for p in itertools.product(*s, repeat=1) if sum(p) <= 1] - prevs = np.asarray(prevs).reshape(len(prevs), -1) - if self.repeats > 1: - prevs = np.repeat(prevs, self.repeats, axis=0) - return prevs - - def samples_parameters(self): - indexes = [] - for prevs in self.prevalence_grid(dimensions=self.data.n_classes): - index = data.sampling_index(self.sample_size, *prevs) - indexes.append(index) - return indexes - - def sample(self, index): - return self.data.sampling_from_index(index) - - -class NPP(AbstractStochasticProtocol): - """ - A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing - samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. - - :param sample_size: integer, the number of instances in each sample - :param repeats: the number of samples to generate - """ - - def __init__(self, data:LabelledCollection, sample_size, repeats=1, random_seed=None): - super(NPP, self).__init__(random_seed) - self.data = data - self.sample_size = sample_size - self.repeats = repeats - self.random_seed = random_seed - - def samples_parameters(self): - indexes = [] - for _ in range(self.repeats): - index = data.uniform_sampling_index(self.sample_size) - indexes.append(index) - return indexes - - def sample(self, index): - return self.data.sampling_from_index(index) - - -class USimplexPP(AbstractStochasticProtocol): - - def __init__(self, data: LabelledCollection, sample_size, repeats=1, random_seed=None): - super(USimplexPP, self).__init__(random_seed) - self.data = data - self.sample_size = sample_size - self.repeats = repeats - self.random_seed = random_seed - - def samples_parameters(self): - indexes = [] - for prevs in F.uniform_simplex_sampling(n_classes=data.n_classes, size=self.repeats): - index = data.sampling_index(self.sample_size, *prevs) - indexes.append(index) - return indexes - - def sample(self, index): - return self.data.sampling_from_index(index) - - -class CovariateShift(AbstractStochasticProtocol): - """ - Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. - - :param domainA: - :param domainB: - :param sample_size: - :param repeats: - :param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing - one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence - will be taken from the domain A (default). - :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will - generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself. - the specific points - :param random_seed: - """ - - def __init__( - self, - domainA: LabelledCollection, - domainB: LabelledCollection, - sample_size, - repeats=1, - prevalence=None, - mixture_points=11, - random_seed=None): - super(CovariateShift, self).__init__(random_seed) - self.data = data - self.sample_size = sample_size - self.repeats = repeats - if prevalence is None: - self.prevalence = domainA.prevalence() - else: - self.prevalence = np.asarray(prevalence) - assert len(self.prevalence) == domainA.n_classes, \ - f'wrong shape for the vector prevalence (expected {domainA.n_classes})' - assert F.check_prevalence_vector(self.prevalence), \ - f'the prevalence vector is not valid (either it contains values outside [0,1] or does not sum up to 1)' - assert isinstance(mixture_points, int) or - self.random_seed = random_seed - - def samples_parameters(self): - indexes = [] - for _ in range(self.repeats): - index = data.uniform_sampling_index(self.sample_size) - indexes.append(index) - return indexes - - def sample(self, index): - return self.data.sampling_from_index(index) - - -if __name__=='__main__': - import numpy as np - import quapy as qp - - y = [0]*25 + [1]*25 + [2]*25 + [3]*25 - X = [str(i)+'-'+str(yi) for i, yi in enumerate(y)] - - data = LabelledCollection(X, y, classes_=sorted(np.unique(y))) - - # p=CounterExample(1, 8, 10, 5) - - # p = APP(data, sample_size=10, n_prevalences=11, random_seed=42) - # p = NPP(data, sample_size=10, repeats=10, random_seed=42) - # p = NPP(data, sample_size=10, repeats=10) - p = USimplexPP(data, sample_size=10, repeats=10) - - for _ in range(2): - print('init generator', p.__class__.__name__) - for i in p(): - # print(i) - print(i.instances, i.labels, i.prevalence()) - - print('done') - diff --git a/quapy/protocol.py b/quapy/protocol.py index 99f2522..43bb0ef 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -1,3 +1,4 @@ +import numpy as np import itertools from collections.abc import Generator from contextlib import ExitStack @@ -5,6 +6,7 @@ from abc import ABCMeta, abstractmethod from quapy.data import LabelledCollection import quapy.functional as F +from tqdm import tqdm # 0.1.7 @@ -12,66 +14,92 @@ import quapy.functional as F # need to change the two references to the above in the wiki / doc, and code examples... # removed artificial_prevalence_sampling from functional +# maybe add some parameters in the init of the protocols (or maybe only for IndexableWhateverProtocols +# indicating that the protocol should return indexes, and not samples themselves? +# also: some parameters in the init could be used to indicate that the method should return a tuple with +# unlabelled instances and the vector of prevalence values (and not a LabelledCollection). +# Or: this can be done in a different function; i.e., we use one function (now __call__) to return +# LabelledCollections, and another new one for returning the other output, which is more general for +# evaluation purposes. -class NewAbstractProtocol(metaclass=Generator): - @abstractmethod - def send(self, value): - """Send a value into the generator. - Return next yielded value or raise StopIteration. - """ - raise StopIteration - - @abstractmethod - def throw(self, typ, val=None, tb=None): - """Raise an exception in the generator. - Return next yielded value or raise StopIteration. - """ - if val is None: - if tb is None: - raise typ - val = typ() - if tb is not None: - val = val.with_traceback(tb) - raise val - +# the so-called "gen" function has to be implemented as a protocol. The problem here is that this function +# should be able to return only unlabelled instances plus a vector of prevalences (and not LabelledCollections). +# This was coded as different functions in 0.1.6 class AbstractProtocol(metaclass=ABCMeta): - """ - Abstract class for sampling protocols. - A sampling protocol defines how to generate samples out of some dataset. - """ + @abstractmethod def __call__(self): """ - A generator that yields one sample at each iteration + Implements the protocol. Yields one sample at a time - :return: yield one sample (instance of :class:`quapy.data.LabelledCollection`) at each iteration + :return: yields one sample at a time """ - for index in self.indexes(data): - yield data.sampling_from_index(index) + ... - def indexes(self, data: LabelledCollection): + def total(self): """ - A generator that yields one sample index at each iteration. - (This function is mainly a generic decorator that sets, if requested, the local random seed; the real - sampling is implemented by :meth:`_indexes`.) + Indicates the total number of samples that the protocol generates. - :param data: the set of data from which samples' indexes are to be drawn - :return: one sample index (instance of `np.ndarray`) at each iteration + :return: The number of samples to generate if known, or `None` otherwise. """ + return None + + +class AbstractStochasticSeededProtocol(AbstractProtocol): + """ + An AbstractStochasticSeededProtocol is a protocol that generates, via any random procedure (e.g., + via random sapling), sequences of `LabelledCollection` samples. The protocol abstraction enforces + the object to be instantiated using a seed, so that the sequence can be completely replicated. + In order to make this functionality possible, the classes extending this abstraction need to + implement only two functions, :meth:`samples_parameters` which generates all the parameters + needed for extracting the samples, and :meth:`sample` that, given some parameters as input, + deterministically generates a sample. + + :param seed: the seed for allowing to replicate any sequence of samples. Default is None, meaning that + the sequence will be different every time the protocol is called. + """ + + def __init__(self, seed=None): + self.random_seed = seed + + @property + def random_seed(self): + return self._random_seed + + @random_seed.setter + def random_seed(self, seed): + self._random_seed = seed + + @abstractmethod + def samples_parameters(self): + """ + This function has to return all the necessary parameters to replicate the samples + + :return: a list of parameters, each of which serves to deterministically generate a sample + """ + ... + + @abstractmethod + def sample(self, params): + """ + Extract one sample determined by the given parameters + + :param params: all the necessary parameters to generate a sample + :return: one sample (the same sample has to be generated for the same parameters) + """ + ... + + def __call__(self): with ExitStack() as stack: if self.random_seed is not None: stack.enter_context(qp.util.temp_seed(self.random_seed)) - for index in self._indexes(data): - yield index - - @abstractmethod - def _indexes(self, data: LabelledCollection): - ... + for params in self.samples_parameters(): + yield self.sample(params) -class APP(AbstractProtocol): +class APP(AbstractStochasticSeededProtocol): """ Implementation of the artificial prevalence protocol (APP). The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., @@ -80,25 +108,22 @@ class APP(AbstractProtocol): [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid combination of prevalence values is indicated by `repeats`. + :param data: a `LabelledCollection` from which the samples will be drawn :param sample_size: integer, number of instances in each sample :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the grid (default is 21) - :param repeats: number of copies for each valid prevalence vector (default is 1) + :param repeats: number of copies for each valid prevalence vector (default is 10) :param random_seed: allows replicating samples across runs (default None) """ - def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=1, random_seed=None): + def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, random_seed=None): + super(APP, self).__init__(random_seed) self.data = data self.sample_size = sample_size self.n_prevalences = n_prevalences self.repeats = repeats - self.random_seed = random_seed - def _indexes(self, data: LabelledCollection): - for prevs in self.prevalence_grid(dimensions=data.n_classes): - yield data.sampling_index(self.sample_size, *prevs) - - def prevalence_grid(self, dimensions, return_constrained_dim=False): + def prevalence_grid(self, dimensions): """ Generates vectors of prevalence values from an exhaustive grid of prevalence values. The number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example, @@ -110,8 +135,6 @@ class APP(AbstractProtocol): 1). Note that this method is deterministic, i.e., there is no random sampling anywhere. :param dimensions: the number of classes - :param return_constrained_dim: set to True to return all dimensions, or to False (default) for ommitting the - constrained dimension :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found in the grid multiplied by `repeat` @@ -119,43 +142,163 @@ class APP(AbstractProtocol): s = np.linspace(0., 1., self.n_prevalences, endpoint=True) s = [s] * (dimensions - 1) prevs = [p for p in itertools.product(*s, repeat=1) if sum(p) <= 1] - if return_constrained_dim: - prevs = [p + (1 - sum(p),) for p in prevs] prevs = np.asarray(prevs).reshape(len(prevs), -1) if self.repeats > 1: prevs = np.repeat(prevs, self.repeats, axis=0) return prevs + def samples_parameters(self): + indexes = [] + for prevs in self.prevalence_grid(dimensions=self.data.n_classes): + index = data.sampling_index(self.sample_size, *prevs) + indexes.append(index) + return indexes -class NPP(AbstractProtocol): + def sample(self, index): + return self.data.sampling_from_index(index) + + def total(self): + return F.num_prevalence_combinations(self.n_prevalences, self.data.n_classes, self.repeats) + + +class NPP(AbstractStochasticSeededProtocol): """ A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. + :param data: a `LabelledCollection` from which the samples will be drawn :param sample_size: integer, the number of instances in each sample - :param repeats: the number of samples to generate + :param repeats: the number of samples to generate. Default is 100. + :param random_seed: allows replicating samples across runs (default None) """ - def __init__(self, sample_size, repeats=1, random_seed=None): + def __init__(self, data:LabelledCollection, sample_size, repeats=100, random_seed=None): + super(NPP, self).__init__(random_seed) + self.data = data self.sample_size = sample_size self.repeats = repeats self.random_seed = random_seed - def _indexes(self, data: LabelledCollection): + def samples_parameters(self): + indexes = [] for _ in range(self.repeats): - yield data.uniform_sampling_index(self.sample_size) + index = data.uniform_sampling_index(self.sample_size) + indexes.append(index) + return indexes + + def sample(self, index): + return self.data.sampling_from_index(index) + + def total(self): + return self.repeats -class USimplexPP(AbstractProtocol): - def __init__(self, sample_size, repeats=1, random_seed=None): +class USimplexPP(AbstractStochasticSeededProtocol): + """ + A variant of :class:`APP` that, instead of using a grid of equidistant prevalence values, + relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with + k the number of classes. This protocol covers the entire range of prevalence values in a + statistical sense, i.e., unlike APP there is no guarantee that it is covered precisely + equally for all classes, but it is preferred in cases in which the number of possible + combinations of the grid values of APP makes this endeavour intractable. + + :param data: a `LabelledCollection` from which the samples will be drawn + :param sample_size: integer, the number of instances in each sample + :param repeats: the number of samples to generate. Default is 100. + :param random_seed: allows replicating samples across runs (default None) + """ + + def __init__(self, data: LabelledCollection, sample_size, repeats=100, random_seed=None): + super(USimplexPP, self).__init__(random_seed) + self.data = data self.sample_size = sample_size self.repeats = repeats self.random_seed = random_seed - def _indexes(self, data: LabelledCollection): + def samples_parameters(self): + indexes = [] for prevs in F.uniform_simplex_sampling(n_classes=data.n_classes, size=self.repeats): - yield data.sampling_index(self.sample_size, *prevs) + index = data.sampling_index(self.sample_size, *prevs) + indexes.append(index) + return indexes + + def sample(self, index): + return self.data.sampling_from_index(index) + + def total(self): + return self.repeats + + + +class CovariateShiftPP(AbstractStochasticSeededProtocol): + """ + Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. + + :param domainA: + :param domainB: + :param sample_size: + :param repeats: + :param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing + one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence + will be taken from the domain A (default). + :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will + generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself. + the specific points + :param random_seed: + """ + + def __init__( + self, + domainA: LabelledCollection, + domainB: LabelledCollection, + sample_size, + repeats=1, + prevalence=None, + mixture_points=11, + random_seed=None): + super(CovariateShiftPP, self).__init__(random_seed) + self.A = domainA + self.B = domainB + self.sample_size = sample_size + self.repeats = repeats + if prevalence is None: + self.prevalence = domainA.prevalence() + else: + self.prevalence = np.asarray(prevalence) + assert len(self.prevalence) == domainA.n_classes, \ + f'wrong shape for the vector prevalence (expected {domainA.n_classes})' + assert F.check_prevalence_vector(self.prevalence), \ + f'the prevalence vector is not valid (either it contains values outside [0,1] or does not sum up to 1)' + if isinstance(mixture_points, int): + self.mixture_points = np.linspace(0, 1, mixture_points)[::-1] + else: + self.mixture_points = np.asarray(mixture_points) + assert all(np.logical_and(self.mixture_points >= 0, self.mixture_points<=1)), \ + 'mixture_model datatype not understood (expected int or a sequence of real values in [0,1])' + self.random_seed = random_seed + + def samples_parameters(self): + indexesA, indexesB = [], [] + for propA in self.mixture_points: + for _ in range(self.repeats): + nA = int(np.round(self.sample_size * propA)) + nB = self.sample_size-nA + sampleAidx = self.A.sampling_index(nA, *self.prevalence) + sampleBidx = self.B.sampling_index(nB, *self.prevalence) + indexesA.append(sampleAidx) + indexesB.append(sampleBidx) + return list(zip(indexesA, indexesB)) + + def sample(self, indexes): + indexesA, indexesB = indexes + sampleA = self.A.sampling_from_index(indexesA) + sampleB = self.B.sampling_from_index(indexesB) + return sampleA+sampleB + + def total(self): + return self.repeats * len(self.mixture_points) + @@ -163,17 +306,27 @@ if __name__=='__main__': import numpy as np import quapy as qp + # domainA y = [0]*25 + [1]*25 + [2]*25 + [3]*25 - X = [str(i)+'-'+str(yi) for i, yi in enumerate(y)] - + X = ['A:'+str(i)+'-'+str(yi) for i, yi in enumerate(y)] data = LabelledCollection(X, y, classes_=sorted(np.unique(y))) - # p = APP(10, n_prevalences=11, random_seed=42) - # p = NPP(10, repeats=10, random_seed=42) - p = USimplexPP(10, repeats=10, random_seed=42) + # domain B + y = [0]*25 + [1]*25 + [2]*25 + [3]*25 + X = ['B:'+str(i)+'-'+str(yi) for i, yi in enumerate(y)] + dataB = LabelledCollection(X, y, classes_=sorted(np.unique(y))) - for i in p(data): - print(i.instances, i.classes, i.prevalence()) + # p = APP(data, sample_size=10, n_prevalences=11, random_seed=42) + # p = NPP(data, sample_size=10, repeats=10, random_seed=42) + # p = NPP(data, sample_size=10, repeats=10) + # p = USimplexPP(data, sample_size=10, repeats=10) + p = CovariateShiftPP(data, dataB, sample_size=10, mixture_points=11, random_seed=1) + + for _ in range(2): + print('init generator', p.__class__.__name__) + for i in tqdm(p(), total=p.total()): + # print(i) + print(i.instances, i.labels, i.prevalence()) print('done') From 4bc9d196358b6b3722eb245549317721f6fd1fb9 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Wed, 25 May 2022 19:14:33 +0200 Subject: [PATCH 05/59] many changes, see change log --- quapy/CHANGE_LOG.txt | 34 +++ quapy/__init__.py | 7 +- quapy/data/base.py | 20 +- quapy/evaluation.py | 102 +++++++++ quapy/method/aggregative.py | 373 +++++++++++++++++---------------- quapy/method/base.py | 88 -------- quapy/method/meta.py | 13 -- quapy/method/neural.py | 2 +- quapy/model_selection.py | 206 ++++++------------ quapy/protocol.py | 96 +++++---- quapy/tests/test_evaluation.py | 57 +++++ quapy/tests/test_hierarchy.py | 32 +++ quapy/tests/test_modsel.py | 77 +++++++ quapy/tests/test_protocols.py | 139 ++++++++++++ 14 files changed, 754 insertions(+), 492 deletions(-) create mode 100644 quapy/CHANGE_LOG.txt create mode 100644 quapy/evaluation.py create mode 100644 quapy/tests/test_evaluation.py create mode 100644 quapy/tests/test_hierarchy.py create mode 100644 quapy/tests/test_modsel.py create mode 100644 quapy/tests/test_protocols.py diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt new file mode 100644 index 0000000..a372109 --- /dev/null +++ b/quapy/CHANGE_LOG.txt @@ -0,0 +1,34 @@ +# main changes in 0.1.7 + +- Protocols is now an abstraction, AbstractProtocol. There is a new class extending AbstractProtocol called + AbstractStochasticSeededProtocol, which implements a seeding policy to allow replicate the series of samplings. + There are some examples of protocols, APP, NPP, USimplexPP, CovariateShiftPP (experimental). + The idea is to start the sampling by simpli calling the __call__ method. + This change has a great impact in the framework, since many functions in qp.evaluation, qp.model_selection, + and sampling functions in LabelledCollection make use of the old functions. + +- ACC, PACC, Forman's threshold variants have been parallelized. + + +Things to fix: +- eval budget policy? +- clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance() +- clean classes_ and n_classes from methods (maybe not from aggregative ones, but those have to be used only + internally and not imposed in any abstract class) +- optimize "qp.evaluation.prediction" for aggregative methods (pre-classification) +- update unit tests +- Policies should be able to set their output to "labelled_collection" or "instances_prevalence" or something similar. +- Policies should implement the "gen()" one, taking a reader function as an input, and a folder path maybe +- Review all documentation, redo the Sphinx doc, update Wikis... +- Resolve the OneVsAll thing (it is in base.py and in aggregative.py +- Better handle the environment (e.g., with n_jobs) +- test cross_generate_predictions and cancel cross_generate_predictions_depr +- Add a proper log? +- test LoadSamplesFromDirectory (in protocols.py) +- improve plots? +- I have removed the distinction between "classify" and "posterior_probabilities" in the Aggregative quantifiers, + so that probabilistic classifiers actually return posterior probabilities, while non-probabilistic quantifiers + return instead crisp decisions. The idea was to unify the quantification function (i.e., now it is always + classify & aggregate, irrespective of the class). However, this has caused a problem with OneVsAll. This has to + be checked, since it is now innecessarily complicated (it also has old references to .probabilistic, and all this + stuff). \ No newline at end of file diff --git a/quapy/__init__.py b/quapy/__init__.py index ad69ae9..2ef4c5c 100644 --- a/quapy/__init__.py +++ b/quapy/__init__.py @@ -2,13 +2,13 @@ from . import error from . import data from quapy.data import datasets from . import functional -from . import method +# from . import method from . import evaluation +from . import protocol from . import plot from . import util from . import model_selection from . import classification -from quapy.method.base import isprobabilistic, isaggregative __version__ = '0.1.7' @@ -21,5 +21,4 @@ environ = { 'SVMPERF_HOME': './svm_perf_quantification' } -def isbinary(x): - return x.binary \ No newline at end of file + diff --git a/quapy/data/base.py b/quapy/data/base.py index cfe2891..c555692 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -210,10 +210,12 @@ class LabelledCollection: :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the second one with `1-train_prop` elements """ - tr_docs, te_docs, tr_labels, te_labels = \ - train_test_split(self.instances, self.labels, train_size=train_prop, stratify=self.labels, - random_state=random_state) - return LabelledCollection(tr_docs, tr_labels), LabelledCollection(te_docs, te_labels) + tr_docs, te_docs, tr_labels, te_labels = train_test_split( + self.instances, self.labels, train_size=train_prop, stratify=self.labels, random_state=random_state + ) + training = LabelledCollection(tr_docs, tr_labels, classes_=self.classes_) + test = LabelledCollection(te_docs, te_labels, classes_=self.classes_) + return training, test def __add__(self, other): """ @@ -418,13 +420,3 @@ class Dataset: yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})') -def isbinary(data): - """ - Returns True if `data` is either a binary :class:`Dataset` or a binary :class:`LabelledCollection` - - :param data: a :class:`Dataset` or a :class:`LabelledCollection` object - :return: True if labelled according to two classes - """ - if isinstance(data, Dataset) or isinstance(data, LabelledCollection): - return data.binary - return False diff --git a/quapy/evaluation.py b/quapy/evaluation.py new file mode 100644 index 0000000..0ea417d --- /dev/null +++ b/quapy/evaluation.py @@ -0,0 +1,102 @@ +from typing import Union, Callable, Iterable +import numpy as np +from tqdm import tqdm +import inspect +import quapy as qp +from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol +from quapy.data import LabelledCollection +from quapy.method.base import BaseQuantifier +from quapy.util import temp_seed +import quapy.functional as F +import pandas as pd + + +def prediction(model: BaseQuantifier, protocol: AbstractProtocol, verbose=False): + sout = lambda x: print(x) if verbose else None + from method.aggregative import AggregativeQuantifier + if isinstance(model, AggregativeQuantifier) and isinstance(protocol, OnLabelledCollectionProtocol): + sout('speeding up the prediction for the aggregative quantifier') + pre_classified = model.classify(protocol.get_labelled_collection().instances) + return __prediction_helper(model.aggregate, protocol.on_preclassified_instances(pre_classified), verbose) + else: + sout(f'the method is not aggregative, or the protocol is not an instance of ' + f'{OnLabelledCollectionProtocol.__name__}, so no optimization can be carried out') + return __prediction_helper(model.quantify, protocol, verbose) + + +def __prediction_helper(quantification_fn, protocol: AbstractProtocol, verbose=False): + true_prevs, estim_prevs = [], [] + for sample in tqdm(protocol(), total=protocol.total()) if verbose else protocol(): + estim_prevs.append(quantification_fn(sample.instances)) + true_prevs.append(sample.prevalence()) + + true_prevs = np.asarray(true_prevs) + estim_prevs = np.asarray(estim_prevs) + + return true_prevs, estim_prevs + + +def evaluation_report(model: BaseQuantifier, + protocol: AbstractProtocol, + error_metrics:Iterable[Union[str,Callable]]='mae', + verbose=False): + + true_prevs, estim_prevs = prediction(model, protocol, verbose) + return _prevalence_report(true_prevs, estim_prevs, error_metrics) + + +def _prevalence_report(true_prevs, estim_prevs, error_metrics: Iterable[Union[str, Callable]] = 'mae'): + + if isinstance(error_metrics, str): + error_metrics = [error_metrics] + + error_funcs = [qp.error.from_name(e) if isinstance(e, str) else e for e in error_metrics] + assert all(hasattr(e, '__call__') for e in error_funcs), 'invalid error functions' + error_names = [e.__name__ for e in error_funcs] + + df = pd.DataFrame(columns=['true-prev', 'estim-prev'] + error_names) + for true_prev, estim_prev in zip(true_prevs, estim_prevs): + series = {'true-prev': true_prev, 'estim-prev': estim_prev} + for error_name, error_metric in zip(error_names, error_funcs): + score = error_metric(true_prev, estim_prev) + series[error_name] = score + df = df.append(series, ignore_index=True) + + return df + + +def evaluate(model: BaseQuantifier, protocol: AbstractProtocol, error_metric:Union[str, Callable], verbose=False): + if isinstance(error_metric, str): + error_metric = qp.error.from_name(error_metric) + true_prevs, estim_prevs = prediction(model, protocol, verbose) + return error_metric(true_prevs, estim_prevs) + + + +def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, repeats=1, verbose=False): + if n_prevpoints is None and eval_budget is None: + raise ValueError('either n_prevpoints or eval_budget has to be specified') + elif n_prevpoints is None: + assert eval_budget > 0, 'eval_budget must be a positive integer' + n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats) + eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) + if verbose: + print(f'setting n_prevpoints={n_prevpoints} so that the number of ' + f'evaluations ({eval_computations}) does not exceed the evaluation ' + f'budget ({eval_budget})') + elif eval_budget is None: + eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) + if verbose: + print(f'{eval_computations} evaluations will be performed for each ' + f'combination of hyper-parameters') + else: + eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) + if eval_computations > eval_budget: + n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats) + new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) + if verbose: + print(f'the budget of evaluations would be exceeded with ' + f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce ' + f'{new_eval_computations} evaluation computations for each hyper-parameter combination.') + return n_prevpoints, eval_computations + diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index c0280a2..ea9cbc0 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -1,15 +1,13 @@ from abc import abstractmethod from copy import deepcopy from typing import Union - import numpy as np from joblib import Parallel, delayed from sklearn.base import BaseEstimator from sklearn.calibration import CalibratedClassifierCV from sklearn.metrics import confusion_matrix -from sklearn.model_selection import StratifiedKFold +from sklearn.model_selection import StratifiedKFold, cross_val_predict from tqdm import tqdm - import quapy as qp import quapy.functional as F from quapy.classification.svmperf import SVMperf @@ -61,7 +59,9 @@ class AggregativeQuantifier(BaseQuantifier): def classify(self, instances): """ - Provides the label predictions for the given instances. + Provides the label predictions for the given instances. The predictions should respect the format expected by + :meth:`aggregate`, i.e., posterior probabilities for probabilistic quantifiers, or crisp predictions for + non-probabilistic quantifiers :param instances: array-like :return: np.ndarray of shape `(n_instances,)` with label predictions @@ -118,16 +118,6 @@ class AggregativeQuantifier(BaseQuantifier): """ return self.learner.classes_ - @property - def aggregative(self): - """ - Returns True, indicating the quantifier is of type aggregative. - - :return: True - """ - - return True - class AggregativeProbabilisticQuantifier(AggregativeQuantifier): """ @@ -137,28 +127,25 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier): probabilities. """ - def posterior_probabilities(self, instances): + def classify(self, instances): return self.learner.predict_proba(instances) - def predict_proba(self, instances): - return self.posterior_probabilities(instances) - - def quantify(self, instances): - classif_posteriors = self.posterior_probabilities(instances) - return self.aggregate(classif_posteriors) - def set_params(self, **parameters): if isinstance(self.learner, CalibratedClassifierCV): parameters = {'base_estimator__' + k: v for k, v in parameters.items()} self.learner.set_params(**parameters) - @property - def probabilistic(self): - return True - # Helper # ------------------------------------ +def _ensure_probabilistic(learner): + if not hasattr(learner, 'predict_proba'): + print(f'The learner {learner.__class__.__name__} does not seem to be probabilistic. ' + f'The learner will be calibrated.') + learner = CalibratedClassifierCV(learner, cv=5) + return learner + + def _training_helper(learner, data: LabelledCollection, fit_learner: bool = True, @@ -180,10 +167,7 @@ def _training_helper(learner, """ if fit_learner: if ensure_probabilistic: - if not hasattr(learner, 'predict_proba'): - print(f'The learner {learner.__class__.__name__} does not seem to be probabilistic. ' - f'The learner will be calibrated.') - learner = CalibratedClassifierCV(learner, cv=5) + learner = _ensure_probabilistic(learner) if val_split is not None: if isinstance(val_split, float): if not (0 < val_split < 1): @@ -214,6 +198,89 @@ def _training_helper(learner, return learner, unused +def cross_generate_predictions( + data, + learner, + val_split, + probabilistic, + fit_learner, + n_jobs +): + + if isinstance(val_split, int): + assert fit_learner == True, \ + 'the parameters for the adjustment cannot be estimated with kFCV with fit_learner=False' + + if probabilistic: + learner = _ensure_probabilistic(learner) + predict = 'predict_proba' + else: + predict = 'predict' + y_pred = cross_val_predict(learner, *data.Xy, cv=val_split, n_jobs=n_jobs, method=predict) + class_count = data.counts() + + # fit the learner on all data + learner.fit(*data.Xy) + classes = data.classes_ + else: + learner, val_data = _training_helper( + learner, data, fit_learner, ensure_probabilistic=probabilistic, val_split=val_split + ) + y_pred = learner.predict_proba(val_data.instances) if probabilistic else learner.predict(val_data.instances) + y = val_data.labels + classes = val_data.classes_ + class_count = val_data.counts() + + return learner, y, y_pred, classes, class_count + + +def cross_generate_predictions_depr( + data, + learner, + val_split, + probabilistic, + fit_learner, + method_name='' +): + predict = learner.predict_proba if probabilistic else learner.predict + if isinstance(val_split, int): + assert fit_learner == True, \ + 'the parameters for the adjustment cannot be estimated with kFCV with fit_learner=False' + # kFCV estimation of parameters + y, y_ = [], [] + kfcv = StratifiedKFold(n_splits=val_split) + pbar = tqdm(kfcv.split(*data.Xy), total=val_split) + for k, (training_idx, validation_idx) in enumerate(pbar): + pbar.set_description(f'{method_name}\tfitting fold {k}') + training = data.sampling_from_index(training_idx) + validation = data.sampling_from_index(validation_idx) + learner, val_data = _training_helper( + learner, training, fit_learner, ensure_probabilistic=probabilistic, val_split=validation + ) + y_.append(predict(val_data.instances)) + y.append(val_data.labels) + + y = np.concatenate(y) + y_ = np.concatenate(y_) + class_count = data.counts() + + # fit the learner on all data + learner, _ = _training_helper( + learner, data, fit_learner, ensure_probabilistic=probabilistic, val_split=None + ) + classes = data.classes_ + + else: + learner, val_data = _training_helper( + learner, data, fit_learner, ensure_probabilistic=probabilistic, val_split=val_split + ) + y_ = predict(val_data.instances) + y = val_data.labels + classes = val_data.classes_ + class_count = val_data.counts() + + return learner, y, y_, classes, class_count + # Methods # ------------------------------------ class CC(AggregativeQuantifier): @@ -264,9 +331,10 @@ class ACC(AggregativeQuantifier): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): + def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=1): self.learner = learner self.val_split = val_split + self.n_jobs = n_jobs def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): """ @@ -280,44 +348,33 @@ class ACC(AggregativeQuantifier): cross validation to estimate the parameters :return: self """ + if val_split is None: val_split = self.val_split - if isinstance(val_split, int): - assert fit_learner == True, \ - 'the parameters for the adjustment cannot be estimated with kFCV with fit_learner=False' - # kFCV estimation of parameters - y, y_ = [], [] - kfcv = StratifiedKFold(n_splits=val_split) - pbar = tqdm(kfcv.split(*data.Xy), total=val_split) - for k, (training_idx, validation_idx) in enumerate(pbar): - pbar.set_description(f'{self.__class__.__name__} fitting fold {k}') - training = data.sampling_from_index(training_idx) - validation = data.sampling_from_index(validation_idx) - learner, val_data = _training_helper(self.learner, training, fit_learner, val_split=validation) - y_.append(learner.predict(val_data.instances)) - y.append(val_data.labels) - y = np.concatenate(y) - y_ = np.concatenate(y_) - class_count = data.counts() - - # fit the learner on all data - self.learner, _ = _training_helper(self.learner, data, fit_learner, val_split=None) - - else: - self.learner, val_data = _training_helper(self.learner, data, fit_learner, val_split=val_split) - y_ = self.learner.predict(val_data.instances) - y = val_data.labels - class_count = val_data.counts() + self.learner, y, y_, classes, class_count = cross_generate_predictions( + data, self.learner, val_split, probabilistic=False, fit_learner=fit_learner, n_jobs=self.n_jobs + ) self.cc = CC(self.learner) - - # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a - # document that belongs to yj ends up being classified as belonging to yi - self.Pte_cond_estim_ = confusion_matrix(y, y_).T / class_count + self.Pte_cond_estim_ = self.getPteCondEstim(data.classes_, y, y_) return self + @classmethod + def getPteCondEstim(cls, classes, y, y_): + # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a + # document that belongs to yj ends up being classified as belonging to yi + conf = confusion_matrix(y, y_, labels=classes).T + conf = conf.astype(np.float) + class_counts = conf.sum(axis=0) + for i, _ in enumerate(classes): + if class_counts[i] == 0: + conf[i, i] = 1 + else: + conf[:, i] /= class_counts[i] + return conf + def classify(self, data): return self.cc.classify(data) @@ -380,9 +437,10 @@ class PACC(AggregativeProbabilisticQuantifier): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): + def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=1): self.learner = learner self.val_split = val_split + self.n_jobs = n_jobs def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): """ @@ -396,52 +454,31 @@ class PACC(AggregativeProbabilisticQuantifier): to estimate the parameters :return: self """ + if val_split is None: val_split = self.val_split - if isinstance(val_split, int): - assert fit_learner == True, \ - 'the parameters for the adjustment cannot be estimated with kFCV with fit_learner=False' - # kFCV estimation of parameters - y, y_ = [], [] - kfcv = StratifiedKFold(n_splits=val_split) - pbar = tqdm(kfcv.split(*data.Xy), total=val_split) - for k, (training_idx, validation_idx) in enumerate(pbar): - pbar.set_description(f'{self.__class__.__name__} fitting fold {k}') - training = data.sampling_from_index(training_idx) - validation = data.sampling_from_index(validation_idx) - learner, val_data = _training_helper( - self.learner, training, fit_learner, ensure_probabilistic=True, val_split=validation) - y_.append(learner.predict_proba(val_data.instances)) - y.append(val_data.labels) - - y = np.concatenate(y) - y_ = np.vstack(y_) - - # fit the learner on all data - self.learner, _ = _training_helper(self.learner, data, fit_learner, ensure_probabilistic=True, - val_split=None) - classes = data.classes_ - - else: - self.learner, val_data = _training_helper( - self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split) - y_ = self.learner.predict_proba(val_data.instances) - y = val_data.labels - classes = val_data.classes_ + self.learner, y, y_, classes, class_count = cross_generate_predictions( + data, self.learner, val_split, probabilistic=True, fit_learner=fit_learner, n_jobs=self.n_jobs + ) self.pcc = PCC(self.learner) + self.Pte_cond_estim_ = self.getPteCondEstim(classes, y, y_) + return self + + @classmethod + def getPteCondEstim(cls, classes, y, y_): # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a # document that belongs to yj ends up being classified as belonging to yi n_classes = len(classes) - confusion = np.empty(shape=(n_classes, n_classes)) + confusion = np.eye(n_classes) for i, class_ in enumerate(classes): - confusion[i] = y_[y == class_].mean(axis=0) + idx = y == class_ + if idx.any(): + confusion[i] = y_[idx].mean(axis=0) - self.Pte_cond_estim_ = confusion.T - - return self + return confusion.T def aggregate(self, classif_posteriors): prevs_estim = self.pcc.aggregate(classif_posteriors) @@ -557,7 +594,7 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier): self._check_binary(data, self.__class__.__name__) self.learner, validation = _training_helper( self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split) - Px = self.posterior_probabilities(validation.instances)[:, 1] # takes only the P(y=+1|x) + Px = self.classify(validation.instances)[:, 1] # takes only the P(y=+1|x) self.Pxy1 = Px[validation.labels == self.learner.classes_[1]] self.Pxy0 = Px[validation.labels == self.learner.classes_[0]] # pre-compute the histogram for positive and negative examples @@ -732,44 +769,24 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): + def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=1): self.learner = learner self.val_split = val_split + self.n_jobs = n_jobs def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): self._check_binary(data, "Threshold Optimization") if val_split is None: val_split = self.val_split - if isinstance(val_split, int): - assert fit_learner == True, \ - 'the parameters for the adjustment cannot be estimated with kFCV with fit_learner=False' - # kFCV estimation of parameters - y, probabilities = [], [] - kfcv = StratifiedKFold(n_splits=val_split) - pbar = tqdm(kfcv.split(*data.Xy), total=val_split) - for k, (training_idx, validation_idx) in enumerate(pbar): - pbar.set_description(f'{self.__class__.__name__} fitting fold {k}') - training = data.sampling_from_index(training_idx) - validation = data.sampling_from_index(validation_idx) - learner, val_data = _training_helper(self.learner, training, fit_learner, val_split=validation) - probabilities.append(learner.predict_proba(val_data.instances)) - y.append(val_data.labels) - y = np.concatenate(y) - probabilities = np.concatenate(probabilities) - - # fit the learner on all data - self.learner, _ = _training_helper(self.learner, data, fit_learner, val_split=None) - - else: - self.learner, val_data = _training_helper(self.learner, data, fit_learner, val_split=val_split) - probabilities = self.learner.predict_proba(val_data.instances) - y = val_data.labels + self.learner, y, y_, classes, class_count = cross_generate_predictions( + data, self.learner, val_split, probabilistic=True, fit_learner=fit_learner, n_jobs=self.n_jobs + ) self.cc = CC(self.learner) - self.tpr, self.fpr = self._optimize_threshold(y, probabilities) + self.tpr, self.fpr = self._optimize_threshold(y, y_) return self @@ -828,7 +845,7 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): def _compute_tpr(self, TP, FP): if TP + FP == 0: - return 0 + return 1 return TP / (TP + FP) def _compute_fpr(self, FP, TN): @@ -1022,54 +1039,59 @@ class OneVsAll(AggregativeQuantifier): def classify(self, instances): """ - Returns a matrix of shape `(n,m,)` with `n` the number of instances and `m` the number of classes. The entry - `(i,j)` is a binary value indicating whether instance `i `belongs to class `j`. The binary classifications are - independent of each other, meaning that an instance can end up be attributed to 0, 1, or more classes. + If the base quantifier is not probabilistic, returns a matrix of shape `(n,m,)` with `n` the number of + instances and `m` the number of classes. The entry `(i,j)` is a binary value indicating whether instance + `i `belongs to class `j`. The binary classifications are independent of each other, meaning that an instance + can end up be attributed to 0, 1, or more classes. + If the base quantifier is probabilistic, returns a matrix of shape `(n,m,2)` with `n` the number of instances + and `m` the number of classes. The entry `(i,j,1)` (resp. `(i,j,0)`) is a value in [0,1] indicating the + posterior probability that instance `i` belongs (resp. does not belong) to class `j`. The posterior + probabilities are independent of each other, meaning that, in general, they do not sum up to one. :param instances: array-like :return: `np.ndarray` """ - classif_predictions_bin = self.__parallel(self._delayed_binary_classification, instances) - return classif_predictions_bin.T - - def posterior_probabilities(self, instances): - """ - Returns a matrix of shape `(n,m,2)` with `n` the number of instances and `m` the number of classes. The entry - `(i,j,1)` (resp. `(i,j,0)`) is a value in [0,1] indicating the posterior probability that instance `i` belongs - (resp. does not belong) to class `j`. - The posterior probabilities are independent of each other, meaning that, in general, they do not sum - up to one. - - :param instances: array-like - :return: `np.ndarray` - """ - - if not self.binary_quantifier.probabilistic: - raise NotImplementedError(f'{self.__class__.__name__} does not implement posterior_probabilities because ' - f'the base quantifier {self.binary_quantifier.__class__.__name__} is not ' - f'probabilistic') - posterior_predictions_bin = self.__parallel(self._delayed_binary_posteriors, instances) - return np.swapaxes(posterior_predictions_bin, 0, 1) - - def aggregate(self, classif_predictions_bin): - if self.probabilistic: - assert classif_predictions_bin.shape[1] == self.n_classes and classif_predictions_bin.shape[2] == 2, \ - 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of posterior ' \ - 'probabilities (2 dimensions) for each document (row) and class (columns)' + classif_predictions = self.__parallel(self._delayed_binary_classification, instances) + if isinstance(self.binary_quantifier, AggregativeProbabilisticQuantifier): + return np.swapaxes(classif_predictions, 0, 1) else: - assert set(np.unique(classif_predictions_bin)).issubset({0, 1}), \ - 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \ - 'predictions for each document (row) and class (columns)' - prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions_bin) + return classif_predictions.T + # + # def posterior_probabilities(self, instances): + # """ + # Returns a matrix of shape `(n,m,2)` with `n` the number of instances and `m` the number of classes. The entry + # `(i,j,1)` (resp. `(i,j,0)`) is a value in [0,1] indicating the posterior probability that instance `i` belongs + # (resp. does not belong) to class `j`. + # The posterior probabilities are independent of each other, meaning that, in general, they do not sum + # up to one. + # + # :param instances: array-like + # :return: `np.ndarray` + # """ + # + # if not isinstance(self.binary_quantifier, AggregativeProbabilisticQuantifier): + # raise NotImplementedError(f'{self.__class__.__name__} does not implement posterior_probabilities because ' + # f'the base quantifier {self.binary_quantifier.__class__.__name__} is not ' + # f'probabilistic') + # posterior_predictions_bin = self.__parallel(self._delayed_binary_posteriors, instances) + # return np.swapaxes(posterior_predictions_bin, 0, 1) + + def aggregate(self, classif_predictions): + # if self.probabilistic: + # assert classif_predictions.shape[1] == self.n_classes and classif_predictions.shape[2] == 2, \ + # 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of posterior ' \ + # 'probabilities (2 dimensions) for each document (row) and class (columns)' + # else: + # assert set(np.unique(classif_predictions)).issubset({0, 1}), \ + # 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \ + # 'predictions for each document (row) and class (columns)' + prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions) return F.normalize_prevalence(prevalences) - def quantify(self, X): - if self.probabilistic: - predictions = self.posterior_probabilities(X) - else: - predictions = self.classify(X) - return self.aggregate(predictions) + # def quantify(self, X): + # predictions = self.classify(X) + # return self.aggregate(predictions) def __parallel(self, func, *args, **kwargs): return np.asarray( @@ -1093,9 +1115,6 @@ class OneVsAll(AggregativeQuantifier): def _delayed_binary_classification(self, c, X): return self.dict_binary_quantifiers[c].classify(X) - def _delayed_binary_posteriors(self, c, X): - return self.dict_binary_quantifiers[c].posterior_probabilities(X) - def _delayed_binary_aggregate(self, c, classif_predictions): # the estimation for the positive class prevalence return self.dict_binary_quantifiers[c].aggregate(classif_predictions[:, c])[1] @@ -1104,21 +1123,3 @@ class OneVsAll(AggregativeQuantifier): bindata = LabelledCollection(data.instances, data.labels == c, classes_=[False, True]) self.dict_binary_quantifiers[c].fit(bindata) - @property - def binary(self): - """ - Informs that the classifier is not binary - - :return: False - """ - return False - - @property - def probabilistic(self): - """ - Indicates if the classifier is probabilistic or not (depending on the nature of the base classifier). - - :return: boolean - """ - - return self.binary_quantifier.probabilistic diff --git a/quapy/method/base.py b/quapy/method/base.py index 4a4962a..55e18c7 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -51,56 +51,6 @@ class BaseQuantifier(metaclass=ABCMeta): """ ... - @property - @abstractmethod - def classes_(self): - """ - Class labels, in the same order in which class prevalence values are to be computed. - - :return: array-like - """ - ... - - @property - def n_classes(self): - """ - Returns the number of classes - - :return: integer - """ - return len(self.classes_) - - # these methods allows meta-learners to reimplement the decision based on their constituents, and not - # based on class structure - @property - def binary(self): - """ - Indicates whether the quantifier is binary or not. - - :return: False (to be overridden) - """ - return False - - @property - def aggregative(self): - """ - Indicates whether the quantifier is of type aggregative or not - - :return: False (to be overridden) - """ - - return False - - @property - def probabilistic(self): - """ - Indicates whether the quantifier is of type probabilistic or not - - :return: False (to be overridden) - """ - - return False - class BinaryQuantifier(BaseQuantifier): """ @@ -112,46 +62,8 @@ class BinaryQuantifier(BaseQuantifier): assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \ f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.' - @property - def binary(self): - """ - Informs that the quantifier is binary - - :return: True - """ - return True -def isbinary(model:BaseQuantifier): - """ - Alias for property `binary` - - :param model: the model - :return: True if the model is binary, False otherwise - """ - return model.binary - - -def isaggregative(model:BaseQuantifier): - """ - Alias for property `aggregative` - - :param model: the model - :return: True if the model is aggregative, False otherwise - """ - - return model.aggregative - - -def isprobabilistic(model:BaseQuantifier): - """ - Alias for property `probabilistic` - - :param model: the model - :return: True if the model is probabilistic, False otherwise - """ - - return model.probabilistic # class OneVsAll: diff --git a/quapy/method/meta.py b/quapy/method/meta.py index 3504301..3e57652 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -234,19 +234,6 @@ class Ensemble(BaseQuantifier): order = np.argsort(dist) return _select_k(predictions, order, k=self.red_size) - @property - def classes_(self): - return self.base_quantifier.classes_ - - @property - def binary(self): - """ - Returns a boolean indicating whether the base quantifiers are binary or not - - :return: boolean - """ - return self.base_quantifier.binary - @property def aggregative(self): """ diff --git a/quapy/method/neural.py b/quapy/method/neural.py index bf1f375..0665634 100644 --- a/quapy/method/neural.py +++ b/quapy/method/neural.py @@ -191,7 +191,7 @@ class QuaNetTrainer(BaseQuantifier): label_predictions = np.argmax(posteriors, axis=-1) prevs_estim = [] for quantifier in self.quantifiers.values(): - predictions = posteriors if quantifier.probabilistic else label_predictions + predictions = posteriors if isinstance(quantifier, AggregativeProbabilisticQuantifier) else label_predictions prevs_estim.extend(quantifier.aggregate(predictions)) # there is no real need for adding static estims like the TPR or FPR from training since those are constant diff --git a/quapy/model_selection.py b/quapy/model_selection.py index eef811b..c1fa817 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -2,14 +2,12 @@ import itertools import signal from copy import deepcopy from typing import Union, Callable - -import numpy as np - +import evaluation import quapy as qp +from protocol import AbstractProtocol, OnLabelledCollectionProtocol from quapy.data.base import LabelledCollection -from quapy.evaluation import artificial_prevalence_prediction, natural_prevalence_prediction, gen_prevalence_prediction from quapy.method.aggregative import BaseQuantifier -import inspect +from time import time class GridSearchQ(BaseQuantifier): @@ -21,33 +19,11 @@ class GridSearchQ(BaseQuantifier): :param model: the quantifier to optimize :type model: BaseQuantifier :param param_grid: a dictionary with keys the parameter names and values the list of values to explore - :param sample_size: the size of the samples to extract from the validation set (ignored if protocol='gen') - :param protocol: either 'app' for the artificial prevalence protocol, 'npp' for the natural prevalence - protocol, or 'gen' for using a custom sampling generator function - :param n_prevpoints: if specified, indicates the number of equally distant points to extract from the interval - [0,1] in order to define the prevalences of the samples; e.g., if n_prevpoints=5, then the prevalences for - each class will be explored in [0.00, 0.25, 0.50, 0.75, 1.00]. If not specified, then eval_budget is requested. - Ignored if protocol!='app'. - :param n_repetitions: the number of repetitions for each combination of prevalences. This parameter is ignored - for the protocol='app' if eval_budget is set and is lower than the number of combinations that would be - generated using the value assigned to n_prevpoints (for the current number of classes and n_repetitions). - Ignored for protocol='npp' and protocol='gen' (use eval_budget for setting a maximum number of samples in - those cases). - :param eval_budget: if specified, sets a ceil on the number of evaluations to perform for each hyper-parameter - combination. For example, if protocol='app', there are 3 classes, n_repetitions=1 and eval_budget=20, then - n_prevpoints will be set to 5, since this will generate 15 different prevalences, i.e., [0, 0, 1], - [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0], and since setting it to 6 would generate more than - 20. When protocol='gen', indicates the maximum number of samples to generate, but less samples will be - generated if the generator yields less samples. + :param protocol: :param error: an error function (callable) or a string indicating the name of an error function (valid ones are those in qp.error.QUANTIFICATION_ERROR :param refit: whether or not to refit the model on the whole labelled collection (training+validation) with the best chosen hyperparameter combination. Ignored if protocol='gen' - :param val_split: either a LabelledCollection on which to test the performance of the different settings, or - a float in [0,1] indicating the proportion of labelled data to extract from the training set, or a callable - returning a generator function each time it is invoked (only for protocol='gen'). - :param n_jobs: number of parallel jobs - :param random_seed: set the seed of the random generator to replicate experiments. Ignored if protocol='gen'. :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested. Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set. @@ -57,65 +33,27 @@ class GridSearchQ(BaseQuantifier): def __init__(self, model: BaseQuantifier, param_grid: dict, - sample_size: Union[int, None], - protocol='app', - n_prevpoints: int = None, - n_repetitions: int = 1, - eval_budget: int = None, + protocol: AbstractProtocol, error: Union[Callable, str] = qp.error.mae, refit=True, - val_split=0.4, - n_jobs=1, - random_seed=42, timeout=-1, + n_jobs=1, verbose=False): self.model = model self.param_grid = param_grid - self.sample_size = sample_size - self.protocol = protocol.lower() - self.n_prevpoints = n_prevpoints - self.n_repetitions = n_repetitions - self.eval_budget = eval_budget + self.protocol = protocol self.refit = refit - self.val_split = val_split - self.n_jobs = n_jobs - self.random_seed = random_seed self.timeout = timeout + self.n_jobs = n_jobs self.verbose = verbose self.__check_error(error) - assert self.protocol in {'app', 'npp', 'gen'}, \ - 'unknown protocol: valid ones are "app" or "npp" for the "artificial" or the "natural" prevalence ' \ - 'protocols. Use protocol="gen" when passing a generator function thorough val_split that yields a ' \ - 'sample (instances) and their prevalence (ndarray) at each iteration.' - assert self.eval_budget is None or isinstance(self.eval_budget, int) - if self.protocol in ['npp', 'gen']: - if self.protocol=='npp' and (self.eval_budget is None or self.eval_budget <= 0): - raise ValueError(f'when protocol="npp" the parameter eval_budget should be ' - f'indicated (and should be >0).') - if self.n_repetitions != 1: - print('[warning] n_repetitions has been set and will be ignored for the selected protocol') + assert isinstance(protocol, AbstractProtocol), 'unknown protocol' def _sout(self, msg): if self.verbose: print(f'[{self.__class__.__name__}]: {msg}') - def __check_training_validation(self, training, validation): - if isinstance(validation, LabelledCollection): - return training, validation - elif isinstance(validation, float): - assert 0. < validation < 1., 'validation proportion should be in (0,1)' - training, validation = training.split_stratified(train_prop=1 - validation) - return training, validation - elif self.protocol=='gen' and inspect.isgenerator(validation()): - return training, validation - else: - raise ValueError(f'"validation" must either be a LabelledCollection or a float in (0,1) indicating the' - f'proportion of training documents to extract (type found: {type(validation)}). ' - f'Optionally, "validation" can be a callable function returning a generator that yields ' - f'the sample instances along with their true prevalence at each iteration by ' - f'setting protocol="gen".') - def __check_error(self, error): if error in qp.error.QUANTIFICATION_ERROR: self.error = error @@ -127,96 +65,86 @@ class GridSearchQ(BaseQuantifier): raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n' f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}') - def __generate_predictions(self, model, val_split): - commons = { - 'n_repetitions': self.n_repetitions, - 'n_jobs': self.n_jobs, - 'random_seed': self.random_seed, - 'verbose': False - } - if self.protocol == 'app': - return artificial_prevalence_prediction( - model, val_split, self.sample_size, - n_prevpoints=self.n_prevpoints, - eval_budget=self.eval_budget, - **commons - ) - elif self.protocol == 'npp': - return natural_prevalence_prediction( - model, val_split, self.sample_size, - **commons) - elif self.protocol == 'gen': - return gen_prevalence_prediction(model, gen_fn=val_split, eval_budget=self.eval_budget) - else: - raise ValueError('unknown protocol') - - def fit(self, training: LabelledCollection, val_split: Union[LabelledCollection, float, Callable] = None): + def fit(self, training: LabelledCollection): """ Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing the error metric. :param training: the training set on which to optimize the hyperparameters - :param val_split: either a LabelledCollection on which to test the performance of the different settings, or - a float in [0,1] indicating the proportion of labelled data to extract from the training set :return: self """ - if val_split is None: - val_split = self.val_split - training, val_split = self.__check_training_validation(training, val_split) - if self.protocol != 'gen': - assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer' - params_keys = list(self.param_grid.keys()) params_values = list(self.param_grid.values()) - model = self.model - - if self.timeout > 0: - def handler(signum, frame): - self._sout('timeout reached') - raise TimeoutError() - - signal.signal(signal.SIGALRM, handler) + protocol = self.protocol + n_jobs = self.n_jobs self.param_scores_ = {} self.best_score_ = None - some_timeouts = False - for values in itertools.product(*params_values): - params = dict({k: values[i] for i, k in enumerate(params_keys)}) - if self.timeout > 0: - signal.alarm(self.timeout) + hyper = [dict({k: values[i] for i, k in enumerate(params_keys)}) for values in itertools.product(*params_values)] + scores = qp.util.parallel(self._delayed_eval, ((params, training) for params in hyper), n_jobs=n_jobs) - try: - # overrides default parameters with the parameters being explored at this iteration - model.set_params(**params) - model.fit(training) - true_prevalences, estim_prevalences = self.__generate_predictions(model, val_split) - score = self.error(true_prevalences, estim_prevalences) - - self._sout(f'checking hyperparams={params} got {self.error.__name__} score {score:.5f}') + for params, score, model in scores: + if score is not None: if self.best_score_ is None or score < self.best_score_: self.best_score_ = score self.best_params_ = params - self.best_model_ = deepcopy(model) + self.best_model_ = model self.param_scores_[str(params)] = score + else: + self.param_scores_[str(params)] = 'timeout' - if self.timeout > 0: - signal.alarm(0) - except TimeoutError: - print(f'timeout reached for config {params}') - some_timeouts = True - - if self.best_score_ is None and some_timeouts: + if self.best_score_ is None: raise TimeoutError('all jobs took more than the timeout time to end') self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f})') if self.refit: - self._sout(f'refitting on the whole development set') - self.best_model_.fit(training + val_split) + if isinstance(protocol, OnLabelledCollectionProtocol): + self._sout(f'refitting on the whole development set') + self.best_model_.fit(training + protocol.get_labelled_collection()) + else: + raise RuntimeWarning(f'"refit" was requested, but the protocol does not ' + f'implement the {OnLabelledCollectionProtocol.__name__} interface') return self + def _delayed_eval(self, args): + params, training = args + + protocol = self.protocol + error = self.error + + if self.timeout > 0: + def handler(signum, frame): + raise TimeoutError() + + signal.signal(signal.SIGALRM, handler) + + tinit = time() + + if self.timeout > 0: + signal.alarm(self.timeout) + + try: + model = deepcopy(self.model) + # overrides default parameters with the parameters being explored at this iteration + model.set_params(**params) + model.fit(training) + score = evaluation.evaluate(model, protocol=protocol, error_metric=error) + + ttime = time()-tinit + self._sout(f'hyperparams={params}\t got {error.__name__} score {score:.5f} [took {ttime:.4f}s]') + + if self.timeout > 0: + signal.alarm(0) + except TimeoutError: + self._sout(f'timeout ({self.timeout}s) reached for config {params}') + score = None + + return params, score, model + + def quantify(self, instances): """Estimate class prevalence values using the best model found after calling the :meth:`fit` method. @@ -227,14 +155,6 @@ class GridSearchQ(BaseQuantifier): assert hasattr(self, 'best_model_'), 'quantify called before fit' return self.best_model().quantify(instances) - @property - def classes_(self): - """ - Classes on which the quantifier has been trained on. - :return: a ndarray of shape `(n_classes)` with the class identifiers - """ - return self.best_model().classes_ - def set_params(self, **parameters): """Sets the hyper-parameters to explore. @@ -260,3 +180,5 @@ class GridSearchQ(BaseQuantifier): if hasattr(self, 'best_model_'): return self.best_model_ raise ValueError('best_model called before fit') + + diff --git a/quapy/protocol.py b/quapy/protocol.py index 43bb0ef..70a98d9 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -1,12 +1,16 @@ +from copy import deepcopy + +import quapy as qp import numpy as np import itertools from collections.abc import Generator from contextlib import ExitStack from abc import ABCMeta, abstractmethod - from quapy.data import LabelledCollection import quapy.functional as F from tqdm import tqdm +from os.path import exists +from glob import glob # 0.1.7 @@ -61,6 +65,8 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): the sequence will be different every time the protocol is called. """ + _random_seed = -1 # means "not set" + def __init__(self, seed=None): self.random_seed = seed @@ -93,13 +99,47 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): def __call__(self): with ExitStack() as stack: + if self.random_seed == -1: + raise ValueError('The random seed has never been initialized. ' + 'Set it to None not to impose replicability.') if self.random_seed is not None: stack.enter_context(qp.util.temp_seed(self.random_seed)) for params in self.samples_parameters(): yield self.sample(params) -class APP(AbstractStochasticSeededProtocol): +class OnLabelledCollectionProtocol: + def get_labelled_collection(self): + return self.data + + def on_preclassified_instances(self, pre_classifications, in_place=False): + assert len(pre_classifications) == len(self.data), \ + f'error: the pre-classified data has different shape ' \ + f'(expected {len(self.data)}, found {len(pre_classifications)})' + if in_place: + self.data.instances = pre_classifications + return self + else: + new = deepcopy(self) + return new.on_preclassified_instances(pre_classifications, in_place=True) + + +class LoadSamplesFromDirectory(AbstractProtocol): + + def __init__(self, folder_path, loader_fn, classes=None, **loader_kwargs): + assert exists(folder_path), f'folder {folder_path} does not exist' + assert callable(loader_fn), f'the passed load_fn does not seem to be callable' + self.folder_path = folder_path + self.loader_fn = loader_fn + self.classes = classes + self.loader_kwargs = loader_kwargs + + def __call__(self): + for file in sorted(glob(self.folder_path, '*')): + yield LabelledCollection.load(file, loader_func=self.loader_fn, classes=self.classes, **self.loader_kwargs) + + +class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): """ Implementation of the artificial prevalence protocol (APP). The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., @@ -123,7 +163,7 @@ class APP(AbstractStochasticSeededProtocol): self.n_prevalences = n_prevalences self.repeats = repeats - def prevalence_grid(self, dimensions): + def prevalence_grid(self): """ Generates vectors of prevalence values from an exhaustive grid of prevalence values. The number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example, @@ -134,14 +174,14 @@ class APP(AbstractStochasticSeededProtocol): to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to 1). Note that this method is deterministic, i.e., there is no random sampling anywhere. - :param dimensions: the number of classes :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found in the grid multiplied by `repeat` """ + dimensions = self.data.n_classes s = np.linspace(0., 1., self.n_prevalences, endpoint=True) s = [s] * (dimensions - 1) - prevs = [p for p in itertools.product(*s, repeat=1) if sum(p) <= 1] + prevs = [p for p in itertools.product(*s, repeat=1) if (sum(p) <= 1.0)] prevs = np.asarray(prevs).reshape(len(prevs), -1) if self.repeats > 1: prevs = np.repeat(prevs, self.repeats, axis=0) @@ -149,8 +189,8 @@ class APP(AbstractStochasticSeededProtocol): def samples_parameters(self): indexes = [] - for prevs in self.prevalence_grid(dimensions=self.data.n_classes): - index = data.sampling_index(self.sample_size, *prevs) + for prevs in self.prevalence_grid(): + index = self.data.sampling_index(self.sample_size, *prevs) indexes.append(index) return indexes @@ -161,7 +201,7 @@ class APP(AbstractStochasticSeededProtocol): return F.num_prevalence_combinations(self.n_prevalences, self.data.n_classes, self.repeats) -class NPP(AbstractStochasticSeededProtocol): +class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): """ A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. @@ -182,7 +222,7 @@ class NPP(AbstractStochasticSeededProtocol): def samples_parameters(self): indexes = [] for _ in range(self.repeats): - index = data.uniform_sampling_index(self.sample_size) + index = self.data.uniform_sampling_index(self.sample_size) indexes.append(index) return indexes @@ -193,8 +233,7 @@ class NPP(AbstractStochasticSeededProtocol): return self.repeats - -class USimplexPP(AbstractStochasticSeededProtocol): +class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): """ A variant of :class:`APP` that, instead of using a grid of equidistant prevalence values, relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with @@ -218,8 +257,8 @@ class USimplexPP(AbstractStochasticSeededProtocol): def samples_parameters(self): indexes = [] - for prevs in F.uniform_simplex_sampling(n_classes=data.n_classes, size=self.repeats): - index = data.sampling_index(self.sample_size, *prevs) + for prevs in F.uniform_simplex_sampling(n_classes=self.data.n_classes, size=self.repeats): + index = self.data.sampling_index(self.sample_size, *prevs) indexes.append(index) return indexes @@ -230,7 +269,6 @@ class USimplexPP(AbstractStochasticSeededProtocol): return self.repeats - class CovariateShiftPP(AbstractStochasticSeededProtocol): """ Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. @@ -300,33 +338,3 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): return self.repeats * len(self.mixture_points) - - -if __name__=='__main__': - import numpy as np - import quapy as qp - - # domainA - y = [0]*25 + [1]*25 + [2]*25 + [3]*25 - X = ['A:'+str(i)+'-'+str(yi) for i, yi in enumerate(y)] - data = LabelledCollection(X, y, classes_=sorted(np.unique(y))) - - # domain B - y = [0]*25 + [1]*25 + [2]*25 + [3]*25 - X = ['B:'+str(i)+'-'+str(yi) for i, yi in enumerate(y)] - dataB = LabelledCollection(X, y, classes_=sorted(np.unique(y))) - - # p = APP(data, sample_size=10, n_prevalences=11, random_seed=42) - # p = NPP(data, sample_size=10, repeats=10, random_seed=42) - # p = NPP(data, sample_size=10, repeats=10) - # p = USimplexPP(data, sample_size=10, repeats=10) - p = CovariateShiftPP(data, dataB, sample_size=10, mixture_points=11, random_seed=1) - - for _ in range(2): - print('init generator', p.__class__.__name__) - for i in tqdm(p(), total=p.total()): - # print(i) - print(i.instances, i.labels, i.prevalence()) - - print('done') - diff --git a/quapy/tests/test_evaluation.py b/quapy/tests/test_evaluation.py new file mode 100644 index 0000000..de6603b --- /dev/null +++ b/quapy/tests/test_evaluation.py @@ -0,0 +1,57 @@ +import unittest +import quapy as qp +from sklearn.linear_model import LogisticRegression +from time import time +from method.aggregative import EMQ +from method.base import BaseQuantifier + + +class EvalTestCase(unittest.TestCase): + def test_eval_speedup(self): + + data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=10, pickle=True) + train, test = data.training, data.test + + protocol = qp.protocol.APP(test, sample_size=1000, n_prevalences=21, repeats=1, random_seed=1) + + class SlowLR(LogisticRegression): + def predict_proba(self, X): + import time + time.sleep(1) + return super().predict_proba(X) + + emq = EMQ(SlowLR()).fit(train) + + tinit = time() + score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True) + tend_optim = time()-tinit + print(f'evaluation (with optimization) took {tend_optim}s [MAE={score:.4f}]') + + class NonAggregativeEMQ(BaseQuantifier): + + def __init__(self, cls): + self.emq = EMQ(cls) + + def quantify(self, instances): + return self.emq.quantify(instances) + + def fit(self, data): + self.emq.fit(data) + return self + + def set_params(self, **parameters): pass + def get_params(self, deep=True): pass + + + emq = NonAggregativeEMQ(SlowLR()).fit(train) + + tinit = time() + score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True) + tend_no_optim = time() - tinit + print(f'evaluation (w/o optimization) took {tend_no_optim}s [MAE={score:.4f}]') + + self.assertEqual(tend_no_optim>tend_optim, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/quapy/tests/test_hierarchy.py b/quapy/tests/test_hierarchy.py new file mode 100644 index 0000000..21af4b6 --- /dev/null +++ b/quapy/tests/test_hierarchy.py @@ -0,0 +1,32 @@ +import unittest + +from sklearn.linear_model import LogisticRegression + +import quapy as qp +from quapy.method.aggregative import * + + + +class HierarchyTestCase(unittest.TestCase): + + def test_aggregative(self): + lr = LogisticRegression() + for m in [CC(lr), PCC(lr), ACC(lr), PACC(lr)]: + self.assertEqual(isinstance(m, AggregativeQuantifier), True) + + def test_binary(self): + lr = LogisticRegression() + for m in [HDy(lr)]: + self.assertEqual(isinstance(m, BinaryQuantifier), True) + + def test_probabilistic(self): + lr = LogisticRegression() + for m in [CC(lr), ACC(lr)]: + self.assertEqual(isinstance(m, AggregativeProbabilisticQuantifier), False) + for m in [PCC(lr), PACC(lr)]: + self.assertEqual(isinstance(m, AggregativeProbabilisticQuantifier), True) + + + +if __name__ == '__main__': + unittest.main() diff --git a/quapy/tests/test_modsel.py b/quapy/tests/test_modsel.py new file mode 100644 index 0000000..637f831 --- /dev/null +++ b/quapy/tests/test_modsel.py @@ -0,0 +1,77 @@ +import unittest + +import numpy as np +from sklearn.linear_model import LogisticRegression +from sklearn.svm import SVC + +import quapy as qp +from method.aggregative import PACC +from model_selection import GridSearchQ +from protocol import APP + + +class ModselTestCase(unittest.TestCase): + + def test_modsel(self): + + q = PACC(LogisticRegression(random_state=1, max_iter=5000)) + + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) + training, validation = data.training.split_stratified(0.7, random_state=1) + # test = data.test + + param_grid = {'C': np.logspace(-3,3,7)} + app = APP(validation, sample_size=100, random_seed=1) + q = GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, verbose=True + ).fit(training) + print('best params', q.best_params_) + print('best score', q.best_score_) + + self.assertEqual(q.best_params_['C'], 10.0) + self.assertEqual(q.best_model().get_params()['C'], 10.0) + + def test_modsel_parallel(self): + + q = PACC(LogisticRegression(random_state=1, max_iter=5000)) + + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) + training, validation = data.training.split_stratified(0.7, random_state=1) + # test = data.test + + param_grid = {'C': np.logspace(-3,3,7)} + app = APP(validation, sample_size=100, random_seed=1) + q = GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, n_jobs=-1, verbose=True + ).fit(training) + print('best params', q.best_params_) + print('best score', q.best_score_) + + self.assertEqual(q.best_params_['C'], 10.0) + self.assertEqual(q.best_model().get_params()['C'], 10.0) + + def test_modsel_timeout(self): + + class SlowLR(LogisticRegression): + def fit(self, X, y, sample_weight=None): + import time + time.sleep(10) + super(SlowLR, self).fit(X, y, sample_weight) + + q = PACC(SlowLR()) + + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) + training, validation = data.training.split_stratified(0.7, random_state=1) + # test = data.test + + param_grid = {'C': np.logspace(-3,3,7)} + app = APP(validation, sample_size=100, random_seed=1) + q = GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=True, timeout=3, n_jobs=-1, verbose=True + ) + with self.assertRaises(TimeoutError): + q.fit(training) + + +if __name__ == '__main__': + unittest.main() diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py new file mode 100644 index 0000000..bf92ce5 --- /dev/null +++ b/quapy/tests/test_protocols.py @@ -0,0 +1,139 @@ +import unittest +import numpy as np +from data import LabelledCollection +from protocol import APP, NPP, USimplexPP, CovariateShiftPP, AbstractStochasticSeededProtocol + + +def mock_labelled_collection(prefix=''): + y = [0] * 250 + [1] * 250 + [2] * 250 + [3] * 250 + X = [prefix + str(i) + '-' + str(yi) for i, yi in enumerate(y)] + return LabelledCollection(X, y, classes_=sorted(np.unique(y))) + + +def samples_to_str(protocol): + samples_str = "" + for sample in protocol(): + samples_str += f'{sample.instances}\t{sample.labels}\t{sample.prevalence()}\n' + return samples_str + + +class TestProtocols(unittest.TestCase): + + def test_app_replicate(self): + data = mock_labelled_collection() + p = APP(data, sample_size=5, n_prevalences=11, random_seed=42) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + + def test_app_not_replicate(self): + data = mock_labelled_collection() + p = APP(data, sample_size=5, n_prevalences=11) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2) + + def test_app_number(self): + data = mock_labelled_collection() + p = APP(data, sample_size=100, n_prevalences=10, repeats=1) + + # surprisingly enough, for some n_prevalences the test fails, notwithstanding + # everything is correct. The problem is that in function APP.prevalence_grid() + # there is sometimes one rounding error that gets cumulated and + # surpasses 1.0 (by a very small float value, 0.0000000000002 or sthe like) + # so these tuples are mistakenly removed... I have tried with np.close, and + # other workarounds, but eventually happens that there is some negative probability + # in the sampling function... + + count = 0 + for _ in p(): + count+=1 + + self.assertEqual(count, p.total()) + + def test_npp_replicate(self): + data = mock_labelled_collection() + p = NPP(data, sample_size=5, repeats=5, random_seed=42) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + + def test_npp_not_replicate(self): + data = mock_labelled_collection() + p = NPP(data, sample_size=5, repeats=5) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2) + + def test_kraemer_replicate(self): + data = mock_labelled_collection() + p = USimplexPP(data, sample_size=5, repeats=10, random_seed=42) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + + def test_kraemer_not_replicate(self): + data = mock_labelled_collection() + p = USimplexPP(data, sample_size=5, repeats=10) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2) + + def test_covariate_shift_replicate(self): + dataA = mock_labelled_collection('domA') + dataB = mock_labelled_collection('domB') + p = CovariateShiftPP(dataA, dataB, sample_size=10, mixture_points=11, random_seed=1) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + + def test_covariate_shift_not_replicate(self): + dataA = mock_labelled_collection('domA') + dataB = mock_labelled_collection('domB') + p = CovariateShiftPP(dataA, dataB, sample_size=10, mixture_points=11) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2) + + def test_no_seed_init(self): + class NoSeedInit(AbstractStochasticSeededProtocol): + def __init__(self): + self.data = mock_labelled_collection() + + def samples_parameters(self): + # return a matrix containing sampling indexes in the rows + return np.random.randint(0, len(self.data), 10*10).reshape(10, 10) + + def sample(self, params): + index = np.unique(params) + return self.data.sampling_from_index(index) + + p = NoSeedInit() + + # this should raise a ValueError, since the class is said to be AbstractStochasticSeededProtocol but the + # random_seed has never been passed to super(NoSeedInit, self).__init__(random_seed) + with self.assertRaises(ValueError): + for sample in p(): + pass + print('done') + + + +if __name__ == '__main__': + unittest.main() From eba6fd8123a0ba4354df4cc508f724667d473a8a Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Thu, 26 May 2022 17:59:23 +0200 Subject: [PATCH 06/59] optimization conditional in the prediction function --- quapy/CHANGE_LOG.txt | 23 +++++++-- quapy/evaluation.py | 74 ++++++++++++++--------------- quapy/functional.py | 1 - quapy/method/aggregative.py | 40 +++------------- quapy/method/base.py | 93 ++++++++++++++++++------------------- quapy/protocol.py | 30 +++++------- 6 files changed, 119 insertions(+), 142 deletions(-) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index a372109..fe39fc3 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -1,9 +1,9 @@ # main changes in 0.1.7 -- Protocols is now an abstraction, AbstractProtocol. There is a new class extending AbstractProtocol called +- Protocols are now abstracted as AbstractProtocol. There is a new class extending AbstractProtocol called AbstractStochasticSeededProtocol, which implements a seeding policy to allow replicate the series of samplings. There are some examples of protocols, APP, NPP, USimplexPP, CovariateShiftPP (experimental). - The idea is to start the sampling by simpli calling the __call__ method. + The idea is to start the sampling by simply calling the __call__ method. This change has a great impact in the framework, since many functions in qp.evaluation, qp.model_selection, and sampling functions in LabelledCollection make use of the old functions. @@ -11,7 +11,6 @@ Things to fix: -- eval budget policy? - clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance() - clean classes_ and n_classes from methods (maybe not from aggregative ones, but those have to be used only internally and not imposed in any abstract class) @@ -31,4 +30,20 @@ Things to fix: return instead crisp decisions. The idea was to unify the quantification function (i.e., now it is always classify & aggregate, irrespective of the class). However, this has caused a problem with OneVsAll. This has to be checked, since it is now innecessarily complicated (it also has old references to .probabilistic, and all this - stuff). \ No newline at end of file + stuff). +- Check method def __parallel(self, func, *args, **kwargs) in aggregative.OneVsAll + +# 0.1.7 +# change the LabelledCollection API (removing protocol-related samplings) +# need to change the two references to the above in the wiki / doc, and code examples... +# removed artificial_prevalence_sampling from functional + +# also: some parameters in the init could be used to indicate that the method should return a tuple with +# unlabelled instances and the vector of prevalence values (and not a LabelledCollection). +# Or: this can be done in a different function; i.e., we use one function (now __call__) to return +# LabelledCollections, and another new one for returning the other output, which is more general for +# evaluation purposes. + +# the so-called "gen" function has to be implemented as a protocol. The problem here is that this function +# should be able to return only unlabelled instances plus a vector of prevalences (and not LabelledCollections). +# This was coded as different functions in 0.1.6 diff --git a/quapy/evaluation.py b/quapy/evaluation.py index 0ea417d..d32cfb7 100644 --- a/quapy/evaluation.py +++ b/quapy/evaluation.py @@ -11,16 +11,35 @@ import quapy.functional as F import pandas as pd -def prediction(model: BaseQuantifier, protocol: AbstractProtocol, verbose=False): +def prediction(model: BaseQuantifier, protocol: AbstractProtocol, aggr_speedup='auto', verbose=False): + assert aggr_speedup in [False, True, 'auto', 'force'], 'invalid value for aggr_speedup' + sout = lambda x: print(x) if verbose else None - from method.aggregative import AggregativeQuantifier - if isinstance(model, AggregativeQuantifier) and isinstance(protocol, OnLabelledCollectionProtocol): - sout('speeding up the prediction for the aggregative quantifier') + + apply_optimization = False + + if aggr_speedup in [True, 'auto', 'force']: + # checks whether the prediction can be made more efficiently; this check consists in verifying if the model is + # of type aggregative, if the protocol is based on LabelledCollection, and if the total number of documents to + # classify using the protocol would exceed the number of test documents in the original collection + from method.aggregative import AggregativeQuantifier + if isinstance(model, AggregativeQuantifier) and isinstance(protocol, OnLabelledCollectionProtocol): + if aggr_speedup == 'force': + apply_optimization = True + sout(f'forcing aggregative speedup') + elif hasattr(protocol, 'sample_size'): + nD = len(protocol.get_labelled_collection()) + samplesD = protocol.total() * protocol.sample_size + if nD < samplesD: + apply_optimization = True + sout(f'speeding up the prediction for the aggregative quantifier, ' + f'total classifications {nD} instead of {samplesD}') + + if apply_optimization: pre_classified = model.classify(protocol.get_labelled_collection().instances) - return __prediction_helper(model.aggregate, protocol.on_preclassified_instances(pre_classified), verbose) + protocol_with_predictions = protocol.on_preclassified_instances(pre_classified) + return __prediction_helper(model.aggregate, protocol_with_predictions, verbose) else: - sout(f'the method is not aggregative, or the protocol is not an instance of ' - f'{OnLabelledCollectionProtocol.__name__}, so no optimization can be carried out') return __prediction_helper(model.quantify, protocol, verbose) @@ -38,10 +57,11 @@ def __prediction_helper(quantification_fn, protocol: AbstractProtocol, verbose=F def evaluation_report(model: BaseQuantifier, protocol: AbstractProtocol, - error_metrics:Iterable[Union[str,Callable]]='mae', + error_metrics: Iterable[Union[str,Callable]] = 'mae', + aggr_speedup='auto', verbose=False): - true_prevs, estim_prevs = prediction(model, protocol, verbose) + true_prevs, estim_prevs = prediction(model, protocol, aggr_speedup=aggr_speedup, verbose=verbose) return _prevalence_report(true_prevs, estim_prevs, error_metrics) @@ -65,38 +85,18 @@ def _prevalence_report(true_prevs, estim_prevs, error_metrics: Iterable[Union[st return df -def evaluate(model: BaseQuantifier, protocol: AbstractProtocol, error_metric:Union[str, Callable], verbose=False): +def evaluate( + model: BaseQuantifier, + protocol: AbstractProtocol, + error_metric:Union[str, Callable], + aggr_speedup='auto', + verbose=False): + if isinstance(error_metric, str): error_metric = qp.error.from_name(error_metric) - true_prevs, estim_prevs = prediction(model, protocol, verbose) + true_prevs, estim_prevs = prediction(model, protocol, aggr_speedup=aggr_speedup, verbose=verbose) return error_metric(true_prevs, estim_prevs) -def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, repeats=1, verbose=False): - if n_prevpoints is None and eval_budget is None: - raise ValueError('either n_prevpoints or eval_budget has to be specified') - elif n_prevpoints is None: - assert eval_budget > 0, 'eval_budget must be a positive integer' - n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats) - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'setting n_prevpoints={n_prevpoints} so that the number of ' - f'evaluations ({eval_computations}) does not exceed the evaluation ' - f'budget ({eval_budget})') - elif eval_budget is None: - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'{eval_computations} evaluations will be performed for each ' - f'combination of hyper-parameters') - else: - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if eval_computations > eval_budget: - n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats) - new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'the budget of evaluations would be exceeded with ' - f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce ' - f'{new_eval_computations} evaluation computations for each hyper-parameter combination.') - return n_prevpoints, eval_computations diff --git a/quapy/functional.py b/quapy/functional.py index 215d89f..e44dacf 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -4,7 +4,6 @@ import scipy import numpy as np - def prevalence_linspace(n_prevalences=21, repeats=1, smooth_limits_epsilon=0.01): """ Produces an array of uniformly separated values of prevalence. diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index ea9cbc0..ca4b25c 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -1023,15 +1023,18 @@ class OneVsAll(AggregativeQuantifier): """ def __init__(self, binary_quantifier, n_jobs=-1): + assert isinstance(self.binary_quantifier, BaseQuantifier), \ + f'{self.binary_quantifier} does not seem to be a Quantifier' + assert isinstance(self.binary_quantifier, AggregativeQuantifier), \ + f'{self.binary_quantifier} does not seem to be of type Aggregative' self.binary_quantifier = binary_quantifier self.n_jobs = n_jobs def fit(self, data: LabelledCollection, fit_learner=True): assert not data.binary, \ f'{self.__class__.__name__} expect non-binary data' - assert isinstance(self.binary_quantifier, BaseQuantifier), \ - f'{self.binary_quantifier} does not seem to be a Quantifier' - assert fit_learner == True, 'fit_learner must be True' + assert fit_learner == True, \ + 'fit_learner must be True' self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_} self.__parallel(self._delayed_binary_fit, data) @@ -1057,42 +1060,11 @@ class OneVsAll(AggregativeQuantifier): return np.swapaxes(classif_predictions, 0, 1) else: return classif_predictions.T - # - # def posterior_probabilities(self, instances): - # """ - # Returns a matrix of shape `(n,m,2)` with `n` the number of instances and `m` the number of classes. The entry - # `(i,j,1)` (resp. `(i,j,0)`) is a value in [0,1] indicating the posterior probability that instance `i` belongs - # (resp. does not belong) to class `j`. - # The posterior probabilities are independent of each other, meaning that, in general, they do not sum - # up to one. - # - # :param instances: array-like - # :return: `np.ndarray` - # """ - # - # if not isinstance(self.binary_quantifier, AggregativeProbabilisticQuantifier): - # raise NotImplementedError(f'{self.__class__.__name__} does not implement posterior_probabilities because ' - # f'the base quantifier {self.binary_quantifier.__class__.__name__} is not ' - # f'probabilistic') - # posterior_predictions_bin = self.__parallel(self._delayed_binary_posteriors, instances) - # return np.swapaxes(posterior_predictions_bin, 0, 1) def aggregate(self, classif_predictions): - # if self.probabilistic: - # assert classif_predictions.shape[1] == self.n_classes and classif_predictions.shape[2] == 2, \ - # 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of posterior ' \ - # 'probabilities (2 dimensions) for each document (row) and class (columns)' - # else: - # assert set(np.unique(classif_predictions)).issubset({0, 1}), \ - # 'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \ - # 'predictions for each document (row) and class (columns)' prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions) return F.normalize_prevalence(prevalences) - # def quantify(self, X): - # predictions = self.classify(X) - # return self.aggregate(predictions) - def __parallel(self, func, *args, **kwargs): return np.asarray( # some quantifiers (in particular, ELM-based ones) cannot be run with multiprocess, since the temp dir they diff --git a/quapy/method/base.py b/quapy/method/base.py index 55e18c7..6c2a0c5 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -1,4 +1,5 @@ from abc import ABCMeta, abstractmethod +from copy import deepcopy from quapy.data import LabelledCollection @@ -62,52 +63,50 @@ class BinaryQuantifier(BaseQuantifier): assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \ f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.' - - - - -# class OneVsAll: -# """ -# Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary -# quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1. -# """ -# -# def __init__(self, binary_method, n_jobs=-1): -# self.binary_method = binary_method -# self.n_jobs = n_jobs -# -# def fit(self, data: LabelledCollection, **kwargs): -# assert not data.binary, f'{self.__class__.__name__} expect non-binary data' -# assert isinstance(self.binary_method, BaseQuantifier), f'{self.binary_method} does not seem to be a Quantifier' -# self.class_method = {c: deepcopy(self.binary_method) for c in data.classes_} -# Parallel(n_jobs=self.n_jobs, backend='threading')( -# delayed(self._delayed_binary_fit)(c, self.class_method, data, **kwargs) for c in data.classes_ -# ) -# return self -# -# def quantify(self, X, *args): -# prevalences = np.asarray( -# Parallel(n_jobs=self.n_jobs, backend='threading')( -# delayed(self._delayed_binary_predict)(c, self.class_method, X) for c in self.classes -# ) -# ) -# return F.normalize_prevalence(prevalences) -# -# @property -# def classes(self): -# return sorted(self.class_method.keys()) -# -# def set_params(self, **parameters): -# self.binary_method.set_params(**parameters) -# -# def get_params(self, deep=True): -# return self.binary_method.get_params() -# -# def _delayed_binary_predict(self, c, learners, X): -# return learners[c].quantify(X)[:,1] # the mean is the estimation for the positive class prevalence -# -# def _delayed_binary_fit(self, c, learners, data, **kwargs): -# bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2) -# learners[c].fit(bindata, **kwargs) +class OneVsAllGeneric: + """ + Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary + quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1. + """ + + def __init__(self, binary_quantifier, n_jobs=1): + assert isinstance(binary_quantifier, BaseQuantifier), \ + f'{binary_quantifier} does not seem to be a Quantifier' + self.binary_quantifier = binary_quantifier + self.n_jobs = n_jobs + + def fit(self, data: LabelledCollection, **kwargs): + assert not data.binary, \ + f'{self.__class__.__name__} expect non-binary data' + self.class_quatifier = {c: deepcopy(self.binary_quantifier) for c in data.classes_} + Parallel(n_jobs=self.n_jobs, backend='threading')( + delayed(self._delayed_binary_fit)(c, self.class_quatifier, data, **kwargs) for c in data.classes_ + ) + return self + + def quantify(self, X, *args): + prevalences = np.asarray( + Parallel(n_jobs=self.n_jobs, backend='threading')( + delayed(self._delayed_binary_predict)(c, self.class_quatifier, X) for c in self.classes + ) + ) + return F.normalize_prevalence(prevalences) + + @property + def classes(self): + return sorted(self.class_quatifier.keys()) + + def set_params(self, **parameters): + self.binary_quantifier.set_params(**parameters) + + def get_params(self, deep=True): + return self.binary_quantifier.get_params() + + def _delayed_binary_predict(self, c, learners, X): + return learners[c].quantify(X)[:,1] # the mean is the estimation for the positive class prevalence + + def _delayed_binary_fit(self, c, learners, data, **kwargs): + bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2) + learners[c].fit(bindata, **kwargs) diff --git a/quapy/protocol.py b/quapy/protocol.py index 70a98d9..d74e797 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -13,24 +13,6 @@ from os.path import exists from glob import glob -# 0.1.7 -# change the LabelledCollection API (removing protocol-related samplings) -# need to change the two references to the above in the wiki / doc, and code examples... -# removed artificial_prevalence_sampling from functional - -# maybe add some parameters in the init of the protocols (or maybe only for IndexableWhateverProtocols -# indicating that the protocol should return indexes, and not samples themselves? -# also: some parameters in the init could be used to indicate that the method should return a tuple with -# unlabelled instances and the vector of prevalence values (and not a LabelledCollection). -# Or: this can be done in a different function; i.e., we use one function (now __call__) to return -# LabelledCollections, and another new one for returning the other output, which is more general for -# evaluation purposes. - -# the so-called "gen" function has to be implemented as a protocol. The problem here is that this function -# should be able to return only unlabelled instances plus a vector of prevalences (and not LabelledCollections). -# This was coded as different functions in 0.1.6 - - class AbstractProtocol(metaclass=ABCMeta): @abstractmethod @@ -133,11 +115,21 @@ class LoadSamplesFromDirectory(AbstractProtocol): self.loader_fn = loader_fn self.classes = classes self.loader_kwargs = loader_kwargs + self._list_files = None def __call__(self): - for file in sorted(glob(self.folder_path, '*')): + for file in self.list_files: yield LabelledCollection.load(file, loader_func=self.loader_fn, classes=self.classes, **self.loader_kwargs) + @property + def list_files(self): + if self._list_files is None: + self._list_files = sorted(glob(self.folder_path, '*')) + return self._list_files + + def total(self): + return len(self.list_files) + class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): """ From 45642ad7789d37b96262102da3b3f3a9dbfb9d97 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Wed, 1 Jun 2022 18:28:59 +0200 Subject: [PATCH 07/59] lequa as dataset --- quapy/CHANGE_LOG.txt | 16 ++++++++- quapy/data/datasets.py | 53 ++++++++++++++++++++++++++++- quapy/evaluation.py | 12 +++---- quapy/method/meta.py | 2 +- quapy/model_selection.py | 7 +++- quapy/protocol.py | 62 ++++++++++++++++++---------------- quapy/tests/test_datasets.py | 13 ++++++- quapy/tests/test_evaluation.py | 10 +++--- quapy/tests/test_modsel.py | 33 +++++++++++++++++- quapy/tests/test_protocols.py | 5 ++- quapy/util.py | 1 + 11 files changed, 163 insertions(+), 51 deletions(-) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index fe39fc3..ab03b01 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -9,9 +9,19 @@ - ACC, PACC, Forman's threshold variants have been parallelized. +- Exploration of hyperparameters in Model selection can now be run in parallel (it was a n_jobs argument in + QuaPy 0.1.6 but only the evaluation part for one specific hyperparameter was run in parallel). + +- The prediction function has been refactored, so it applies the optimization for aggregative quantifiers (that + consists in pre-classifying all instances, and then only invoking aggregate on the samples) only in cases in + which the total number of classifications would be smaller than the number of classifications with the standard + procedure. The user can now specify "force", "auto", True of False, in order to actively decide for applying it + or not. Things to fix: -- clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance() +- clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance(): + this is not working; I don't know how to make the isinstance work. Looks like there is some problem with the + path of the imported class wrt the path of the class that arrives from another module... - clean classes_ and n_classes from methods (maybe not from aggregative ones, but those have to be used only internally and not imposed in any abstract class) - optimize "qp.evaluation.prediction" for aggregative methods (pre-classification) @@ -33,6 +43,10 @@ Things to fix: stuff). - Check method def __parallel(self, func, *args, **kwargs) in aggregative.OneVsAll +New features: +- Add LeQua2022 to datasets (everything automatic, and with proper protocols "gen") +- Add an "experimental room", with scripts to quickly test new ideas and see results. + # 0.1.7 # change the LabelledCollection API (removing protocol-related samplings) # need to change the two references to the above in the wiki / doc, and code examples... diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 74e2a3e..06ba3d0 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -43,6 +43,8 @@ UCI_DATASETS = ['acute.a', 'acute.b', 'wine-q-red', 'wine-q-white', 'yeast'] +LEQUA2022_TASKS = ['T1A', 'T1B', 'T2A', 'T2B'] + def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False) -> Dataset: """ @@ -532,4 +534,53 @@ def fetch_UCILabelledCollection(dataset_name, data_home=None, verbose=False) -> def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float): - df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False) \ No newline at end of file + df[col] = df[col].apply(lambda x:repl[x]).astype(astype, copy=False) + + +def fetch_lequa2022(task, data_home=None): + """ + """ + from quapy.data._lequa2022 import load_raw_documents, load_vector_documents, SamplesFromDir + + assert task in LEQUA2022_TASKS, \ + f'Unknown task {task}. Valid ones are {LEQUA2022_TASKS}' + if data_home is None: + data_home = get_quapy_home() + + URL_TRAINDEV=f'https://zenodo.org/record/6546188/files/{task}.train_dev.zip' + URL_TEST=f'https://zenodo.org/record/6546188/files/{task}.test.zip' + URL_TEST_PREV=f'https://zenodo.org/record/6546188/files/{task}.test_prevalences.zip' + + lequa_dir = join(data_home, 'lequa2022') + os.makedirs(lequa_dir, exist_ok=True) + + def download_unzip_and_remove(unzipped_path, url): + tmp_path = join(lequa_dir, task + '_tmp.zip') + download_file_if_not_exists(url, tmp_path) + with zipfile.ZipFile(tmp_path) as file: + file.extractall(unzipped_path) + os.remove(tmp_path) + + if not os.path.exists(join(lequa_dir, task)): + download_unzip_and_remove(lequa_dir, URL_TRAINDEV) + download_unzip_and_remove(lequa_dir, URL_TEST) + download_unzip_and_remove(lequa_dir, URL_TEST_PREV) + + if task in ['T1A', 'T1B']: + load_fn = load_vector_documents + elif task in ['T2A', 'T2B']: + load_fn = load_raw_documents + + tr_path = join(lequa_dir, task, 'public', 'training_data.txt') + train = LabelledCollection.load(tr_path, loader_func=load_fn) + + val_samples_path = join(lequa_dir, task, 'public', 'dev_samples') + val_true_prev_path = join(lequa_dir, task, 'public', 'dev_prevalences.txt') + val_gen = SamplesFromDir(val_samples_path, val_true_prev_path, load_fn=load_fn) + + test_samples_path = join(lequa_dir, task, 'public', 'dev_samples') + test_true_prev_path = join(lequa_dir, task, 'public', 'test_prevalences.txt') + test_gen = SamplesFromDir(val_samples_path, val_true_prev_path, load_fn=load_fn) + + return train, val_gen, test_gen + diff --git a/quapy/evaluation.py b/quapy/evaluation.py index d32cfb7..57c2ed1 100644 --- a/quapy/evaluation.py +++ b/quapy/evaluation.py @@ -1,13 +1,9 @@ from typing import Union, Callable, Iterable import numpy as np from tqdm import tqdm -import inspect import quapy as qp from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol -from quapy.data import LabelledCollection from quapy.method.base import BaseQuantifier -from quapy.util import temp_seed -import quapy.functional as F import pandas as pd @@ -22,7 +18,7 @@ def prediction(model: BaseQuantifier, protocol: AbstractProtocol, aggr_speedup=' # checks whether the prediction can be made more efficiently; this check consists in verifying if the model is # of type aggregative, if the protocol is based on LabelledCollection, and if the total number of documents to # classify using the protocol would exceed the number of test documents in the original collection - from method.aggregative import AggregativeQuantifier + from quapy.method.aggregative import AggregativeQuantifier if isinstance(model, AggregativeQuantifier) and isinstance(protocol, OnLabelledCollectionProtocol): if aggr_speedup == 'force': apply_optimization = True @@ -45,9 +41,9 @@ def prediction(model: BaseQuantifier, protocol: AbstractProtocol, aggr_speedup=' def __prediction_helper(quantification_fn, protocol: AbstractProtocol, verbose=False): true_prevs, estim_prevs = [], [] - for sample in tqdm(protocol(), total=protocol.total()) if verbose else protocol(): - estim_prevs.append(quantification_fn(sample.instances)) - true_prevs.append(sample.prevalence()) + for sample_instances, sample_prev in tqdm(protocol(), total=protocol.total()) if verbose else protocol(): + estim_prevs.append(quantification_fn(sample_instances)) + true_prevs.append(sample_prev) true_prevs = np.asarray(true_prevs) estim_prevs = np.asarray(estim_prevs) diff --git a/quapy/method/meta.py b/quapy/method/meta.py index 3e57652..d5e8c2a 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -9,7 +9,6 @@ from tqdm import tqdm import quapy as qp from quapy import functional as F from quapy.data import LabelledCollection -from quapy.evaluation import evaluate from quapy.model_selection import GridSearchQ try: @@ -176,6 +175,7 @@ class Ensemble(BaseQuantifier): For each model in the ensemble, the performance is measured in terms of _error_name_ on the quantification of the samples used for training the rest of the models in the ensemble. """ + from quapy.evaluation import evaluate error = qp.error.from_name(error_name) tests = [m[3] for m in self.ensemble] scores = [] diff --git a/quapy/model_selection.py b/quapy/model_selection.py index c1fa817..7d71023 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -81,6 +81,8 @@ class GridSearchQ(BaseQuantifier): self.param_scores_ = {} self.best_score_ = None + tinit = time() + hyper = [dict({k: values[i] for i, k in enumerate(params_keys)}) for values in itertools.product(*params_values)] scores = qp.util.parallel(self._delayed_eval, ((params, training) for params in hyper), n_jobs=n_jobs) @@ -94,10 +96,13 @@ class GridSearchQ(BaseQuantifier): else: self.param_scores_[str(params)] = 'timeout' + tend = time()-tinit + if self.best_score_ is None: raise TimeoutError('all jobs took more than the timeout time to end') - self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f})') + self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) ' + f'[took {tend:.4f}s]') if self.refit: if isinstance(protocol, OnLabelledCollectionProtocol): diff --git a/quapy/protocol.py b/quapy/protocol.py index d74e797..f539830 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -1,14 +1,11 @@ from copy import deepcopy - import quapy as qp import numpy as np import itertools -from collections.abc import Generator from contextlib import ExitStack from abc import ABCMeta, abstractmethod from quapy.data import LabelledCollection import quapy.functional as F -from tqdm import tqdm from os.path import exists from glob import glob @@ -87,10 +84,14 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): if self.random_seed is not None: stack.enter_context(qp.util.temp_seed(self.random_seed)) for params in self.samples_parameters(): - yield self.sample(params) + yield self.collator_fn(self.sample(params)) + + def set_collator(self, collator_fn): + self.collator_fn = collator_fn class OnLabelledCollectionProtocol: + def get_labelled_collection(self): return self.data @@ -106,31 +107,6 @@ class OnLabelledCollectionProtocol: return new.on_preclassified_instances(pre_classifications, in_place=True) -class LoadSamplesFromDirectory(AbstractProtocol): - - def __init__(self, folder_path, loader_fn, classes=None, **loader_kwargs): - assert exists(folder_path), f'folder {folder_path} does not exist' - assert callable(loader_fn), f'the passed load_fn does not seem to be callable' - self.folder_path = folder_path - self.loader_fn = loader_fn - self.classes = classes - self.loader_kwargs = loader_kwargs - self._list_files = None - - def __call__(self): - for file in self.list_files: - yield LabelledCollection.load(file, loader_func=self.loader_fn, classes=self.classes, **self.loader_kwargs) - - @property - def list_files(self): - if self._list_files is None: - self._list_files = sorted(glob(self.folder_path, '*')) - return self._list_files - - def total(self): - return len(self.list_files) - - class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): """ Implementation of the artificial prevalence protocol (APP). @@ -154,6 +130,7 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): self.sample_size = sample_size self.n_prevalences = n_prevalences self.repeats = repeats + self.set_collator(collator_fn=lambda x: (x.instances, x.prevalence())) def prevalence_grid(self): """ @@ -210,6 +187,7 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): self.sample_size = sample_size self.repeats = repeats self.random_seed = random_seed + self.set_collator(collator_fn=lambda x: (x.instances, x.prevalence())) def samples_parameters(self): indexes = [] @@ -246,6 +224,7 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) self.sample_size = sample_size self.repeats = repeats self.random_seed = random_seed + self.set_collator(collator_fn=lambda x: (x.instances, x.prevalence())) def samples_parameters(self): indexes = [] @@ -261,6 +240,31 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) return self.repeats +# class LoadSamplesFromDirectory(AbstractProtocol): +# +# def __init__(self, folder_path, loader_fn, classes=None, **loader_kwargs): +# assert exists(folder_path), f'folder {folder_path} does not exist' +# assert callable(loader_fn), f'the passed load_fn does not seem to be callable' +# self.folder_path = folder_path +# self.loader_fn = loader_fn +# self.classes = classes +# self.loader_kwargs = loader_kwargs +# self._list_files = None +# +# def __call__(self): +# for file in self.list_files: +# yield LabelledCollection.load(file, loader_func=self.loader_fn, classes=self.classes, **self.loader_kwargs) +# +# @property +# def list_files(self): +# if self._list_files is None: +# self._list_files = sorted(glob(self.folder_path, '*')) +# return self._list_files +# +# def total(self): +# return len(self.list_files) + + class CovariateShiftPP(AbstractStochasticSeededProtocol): """ Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py index 88209e8..8d70fe9 100644 --- a/quapy/tests/test_datasets.py +++ b/quapy/tests/test_datasets.py @@ -1,7 +1,8 @@ import pytest from quapy.data.datasets import REVIEWS_SENTIMENT_DATASETS, TWITTER_SENTIMENT_DATASETS_TEST, \ - TWITTER_SENTIMENT_DATASETS_TRAIN, UCI_DATASETS, fetch_reviews, fetch_twitter, fetch_UCIDataset + TWITTER_SENTIMENT_DATASETS_TRAIN, UCI_DATASETS, LEQUA2022_TASKS, \ + fetch_reviews, fetch_twitter, fetch_UCIDataset, fetch_lequa2022 @pytest.mark.parametrize('dataset_name', REVIEWS_SENTIMENT_DATASETS) @@ -41,3 +42,13 @@ def test_fetch_UCIDataset(dataset_name): print('Training set stats') dataset.training.stats() print('Test set stats') + + +@pytest.mark.parametrize('dataset_name', LEQUA2022_TASKS) +def test_fetch_lequa2022(dataset_name): + fetch_lequa2022(dataset_name) + # dataset = fetch_lequa2022(dataset_name) + # print(f'Dataset {dataset_name}') + # print('Training set stats') + # dataset.training.stats() + # print('Test set stats') \ No newline at end of file diff --git a/quapy/tests/test_evaluation.py b/quapy/tests/test_evaluation.py index de6603b..73dc485 100644 --- a/quapy/tests/test_evaluation.py +++ b/quapy/tests/test_evaluation.py @@ -2,8 +2,8 @@ import unittest import quapy as qp from sklearn.linear_model import LogisticRegression from time import time -from method.aggregative import EMQ -from method.base import BaseQuantifier +from quapy.method.aggregative import EMQ +from quapy.method.base import BaseQuantifier class EvalTestCase(unittest.TestCase): @@ -12,7 +12,7 @@ class EvalTestCase(unittest.TestCase): data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=10, pickle=True) train, test = data.training, data.test - protocol = qp.protocol.APP(test, sample_size=1000, n_prevalences=21, repeats=1, random_seed=1) + protocol = qp.protocol.APP(test, sample_size=1000, n_prevalences=11, repeats=1, random_seed=1) class SlowLR(LogisticRegression): def predict_proba(self, X): @@ -23,7 +23,7 @@ class EvalTestCase(unittest.TestCase): emq = EMQ(SlowLR()).fit(train) tinit = time() - score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True) + score = qp.evaluation.evaluate(emq, protocol, error_metric='mae', verbose=True, aggr_speedup='force') tend_optim = time()-tinit print(f'evaluation (with optimization) took {tend_optim}s [MAE={score:.4f}]') @@ -50,7 +50,7 @@ class EvalTestCase(unittest.TestCase): tend_no_optim = time() - tinit print(f'evaluation (w/o optimization) took {tend_no_optim}s [MAE={score:.4f}]') - self.assertEqual(tend_no_optim>tend_optim, True) + self.assertEqual(tend_no_optim>(tend_optim/2), True) if __name__ == '__main__': diff --git a/quapy/tests/test_modsel.py b/quapy/tests/test_modsel.py index 637f831..9c6604a 100644 --- a/quapy/tests/test_modsel.py +++ b/quapy/tests/test_modsel.py @@ -8,6 +8,7 @@ import quapy as qp from method.aggregative import PACC from model_selection import GridSearchQ from protocol import APP +import time class ModselTestCase(unittest.TestCase): @@ -18,7 +19,6 @@ class ModselTestCase(unittest.TestCase): data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) training, validation = data.training.split_stratified(0.7, random_state=1) - # test = data.test param_grid = {'C': np.logspace(-3,3,7)} app = APP(validation, sample_size=100, random_seed=1) @@ -50,6 +50,37 @@ class ModselTestCase(unittest.TestCase): self.assertEqual(q.best_params_['C'], 10.0) self.assertEqual(q.best_model().get_params()['C'], 10.0) + def test_modsel_parallel_speedup(self): + class SlowLR(LogisticRegression): + def fit(self, X, y, sample_weight=None): + time.sleep(1) + return super(SlowLR, self).fit(X, y, sample_weight) + + q = PACC(SlowLR(random_state=1, max_iter=5000)) + + data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) + training, validation = data.training.split_stratified(0.7, random_state=1) + + param_grid = {'C': np.logspace(-3, 3, 7)} + app = APP(validation, sample_size=100, random_seed=1) + + tinit = time.time() + GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=1, verbose=True + ).fit(training) + tend_nooptim = time.time()-tinit + + tinit = time.time() + GridSearchQ( + q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=-1, verbose=True + ).fit(training) + tend_optim = time.time() - tinit + + print(f'parallel training took {tend_optim:.4f}s') + print(f'sequential training took {tend_nooptim:.4f}s') + + self.assertEqual(tend_optim < (0.5*tend_nooptim), True) + def test_modsel_timeout(self): class SlowLR(LogisticRegression): diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index bf92ce5..b68567b 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -1,7 +1,7 @@ import unittest import numpy as np -from data import LabelledCollection -from protocol import APP, NPP, USimplexPP, CovariateShiftPP, AbstractStochasticSeededProtocol +from quapy.data import LabelledCollection +from quapy.protocol import APP, NPP, USimplexPP, CovariateShiftPP, AbstractStochasticSeededProtocol def mock_labelled_collection(prefix=''): @@ -134,6 +134,5 @@ class TestProtocols(unittest.TestCase): print('done') - if __name__ == '__main__': unittest.main() diff --git a/quapy/util.py b/quapy/util.py index 9d44633..952c2da 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -46,6 +46,7 @@ def parallel(func, args, n_jobs): that takes the `quapy.environ` variable as input silently """ + print('n_jobs',n_jobs) def func_dec(environ, *args): qp.environ = environ return func(*args) From bfe4b8b51a42ce29bba812697f22aed451a9ec58 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 3 Jun 2022 13:51:22 +0200 Subject: [PATCH 08/59] updating properties of labelled collection --- quapy/data/base.py | 38 ++++++++++++++++++++++++++++++++++++++ quapy/protocol.py | 17 ++++++++++++++--- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/quapy/data/base.py b/quapy/data/base.py index c555692..b22a71f 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -63,6 +63,7 @@ class LabelledCollection: """ return self.instances.shape[0] + @property def prevalence(self): """ Returns the prevalence, or relative frequency, of the classes of interest. @@ -248,6 +249,43 @@ class LabelledCollection: """ return self.instances, self.labels + @property + def Xp(self): + """ + Gets the instances and the true prevalence. This is useful when implementing evaluation protocols + + :return: a tuple `(instances, prevalence)` from this collection + """ + return self.instances, self.prevalence() + + @property + def X(self): + """ + An alias to self.instances + + :return: self.instances + """ + return self.instances + + @property + def y(self): + """ + An alias to self.labels + + :return: self.labels + """ + return self.labels + + @property + def p(self): + """ + An alias to self.prevalence() + + :return: self.prevalence() + """ + return self.prevalence() + + def stats(self, show=True): """ Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,: diff --git a/quapy/protocol.py b/quapy/protocol.py index f539830..c55c3ef 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -84,14 +84,16 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): if self.random_seed is not None: stack.enter_context(qp.util.temp_seed(self.random_seed)) for params in self.samples_parameters(): - yield self.collator_fn(self.sample(params)) + yield self.collator(self.sample(params)) - def set_collator(self, collator_fn): - self.collator_fn = collator_fn + def collator(self, sample, *args): + return sample class OnLabelledCollectionProtocol: + RETURN_TYPES = ['sample_prev', 'labelled_collection'] + def get_labelled_collection(self): return self.data @@ -106,6 +108,15 @@ class OnLabelledCollectionProtocol: new = deepcopy(self) return new.on_preclassified_instances(pre_classifications, in_place=True) + @classmethod + def get_collator(cls, return_type='sample_prev'): + assert return_type in cls.RETURN_TYPES, \ + f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}' + if return_type=='sample_prev': + return lambda lc:lc.Xp + elif return_type=='labelled_collection': + return lambda lc:lc + class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): """ From 82a01478ec80eeb1fead5c320bb5e329a7ee9441 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 3 Jun 2022 18:02:52 +0200 Subject: [PATCH 09/59] collator functions in protocols for preparing the outputs --- quapy/data/base.py | 18 ++++++++++++++---- quapy/protocol.py | 14 +++++++------- quapy/tests/test_datasets.py | 10 ++++------ quapy/tests/test_protocols.py | 4 ++-- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/quapy/data/base.py b/quapy/data/base.py index b22a71f..4601c15 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -63,10 +63,9 @@ class LabelledCollection: """ return self.instances.shape[0] - @property def prevalence(self): """ - Returns the prevalence, or relative frequency, of the classes of interest. + Returns the prevalence, or relative frequency, of the classes in the codeframe. :return: a np.ndarray of shape `(n_classes)` with the relative frequencies of each class, in the same order as listed by `self.classes_` @@ -75,7 +74,7 @@ class LabelledCollection: def counts(self): """ - Returns the number of instances for each of the classes of interest. + Returns the number of instances for each of the classes in the codeframe. :return: a np.ndarray of shape `(n_classes)` with the number of instances of each class, in the same order as listed by `self.classes_` @@ -252,7 +251,8 @@ class LabelledCollection: @property def Xp(self): """ - Gets the instances and the true prevalence. This is useful when implementing evaluation protocols + Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from + a `LabelledCollection` object. :return: a tuple `(instances, prevalence)` from this collection """ @@ -420,6 +420,16 @@ class Dataset: """ return len(self.vocabulary) + @property + def train_test(self): + """ + Alias to `self.training` and `self.test` + + :return: the training and test collections + :return: the training and test collections + """ + return self.training, self.test + def stats(self, show): """ Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,: diff --git a/quapy/protocol.py b/quapy/protocol.py index c55c3ef..fec37ca 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -135,13 +135,13 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param random_seed: allows replicating samples across runs (default None) """ - def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, random_seed=None): + def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, random_seed=None, return_type='sample_prev'): super(APP, self).__init__(random_seed) self.data = data self.sample_size = sample_size self.n_prevalences = n_prevalences self.repeats = repeats - self.set_collator(collator_fn=lambda x: (x.instances, x.prevalence())) + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def prevalence_grid(self): """ @@ -192,13 +192,13 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param random_seed: allows replicating samples across runs (default None) """ - def __init__(self, data:LabelledCollection, sample_size, repeats=100, random_seed=None): + def __init__(self, data:LabelledCollection, sample_size, repeats=100, random_seed=None, return_type='sample_prev'): super(NPP, self).__init__(random_seed) self.data = data self.sample_size = sample_size self.repeats = repeats self.random_seed = random_seed - self.set_collator(collator_fn=lambda x: (x.instances, x.prevalence())) + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): indexes = [] @@ -229,13 +229,13 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) :param random_seed: allows replicating samples across runs (default None) """ - def __init__(self, data: LabelledCollection, sample_size, repeats=100, random_seed=None): + def __init__(self, data: LabelledCollection, sample_size, repeats=100, random_seed=None, return_type='sample_prev'): super(USimplexPP, self).__init__(random_seed) self.data = data self.sample_size = sample_size self.repeats = repeats self.random_seed = random_seed - self.set_collator(collator_fn=lambda x: (x.instances, x.prevalence())) + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): indexes = [] @@ -339,7 +339,7 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): indexesA, indexesB = indexes sampleA = self.A.sampling_from_index(indexesA) sampleB = self.B.sampling_from_index(indexesB) - return sampleA+sampleB + return (sampleA+sampleB).Xp def total(self): return self.repeats * len(self.mixture_points) diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py index 8d70fe9..b0c2f7a 100644 --- a/quapy/tests/test_datasets.py +++ b/quapy/tests/test_datasets.py @@ -46,9 +46,7 @@ def test_fetch_UCIDataset(dataset_name): @pytest.mark.parametrize('dataset_name', LEQUA2022_TASKS) def test_fetch_lequa2022(dataset_name): - fetch_lequa2022(dataset_name) - # dataset = fetch_lequa2022(dataset_name) - # print(f'Dataset {dataset_name}') - # print('Training set stats') - # dataset.training.stats() - # print('Test set stats') \ No newline at end of file + train, gen_val, gen_test = fetch_lequa2022(dataset_name) + print(train.stats()) + print('Val:', gen_val.total()) + print('Test:', gen_test.total()) diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index b68567b..aeb1f4e 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -12,8 +12,8 @@ def mock_labelled_collection(prefix=''): def samples_to_str(protocol): samples_str = "" - for sample in protocol(): - samples_str += f'{sample.instances}\t{sample.labels}\t{sample.prevalence()}\n' + for instances, prev in protocol(): + samples_str += f'{instances}\t{prev}\n' return samples_str From 2cc7db60ccad5c1b70b9a8d2e7d492fd6d8b5357 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 14 Jun 2022 09:35:39 +0200 Subject: [PATCH 10/59] updating parallel policy to take n_jobs from environment (not yet tested) --- quapy/CHANGE_LOG.txt | 4 +++- quapy/__init__.py | 9 ++++++++- quapy/data/preprocessing.py | 5 +++-- quapy/method/aggregative.py | 18 ++++++++++-------- quapy/method/base.py | 7 ++++--- quapy/method/meta.py | 4 ++-- quapy/model_selection.py | 7 +++---- quapy/util.py | 6 ++++-- 8 files changed, 37 insertions(+), 23 deletions(-) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index ab03b01..095bb76 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -18,6 +18,8 @@ procedure. The user can now specify "force", "auto", True of False, in order to actively decide for applying it or not. +- n_jobs is now taken from the environment if set to None + Things to fix: - clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance(): this is not working; I don't know how to make the isinstance work. Looks like there is some problem with the @@ -29,7 +31,7 @@ Things to fix: - Policies should be able to set their output to "labelled_collection" or "instances_prevalence" or something similar. - Policies should implement the "gen()" one, taking a reader function as an input, and a folder path maybe - Review all documentation, redo the Sphinx doc, update Wikis... -- Resolve the OneVsAll thing (it is in base.py and in aggregative.py +- Resolve the OneVsAll thing (it is in base.py and in aggregative.py) - Better handle the environment (e.g., with n_jobs) - test cross_generate_predictions and cancel cross_generate_predictions_depr - Add a proper log? diff --git a/quapy/__init__.py b/quapy/__init__.py index 2ef4c5c..54b1603 100644 --- a/quapy/__init__.py +++ b/quapy/__init__.py @@ -18,7 +18,14 @@ environ = { 'UNK_INDEX': 0, 'PAD_TOKEN': '[PAD]', 'PAD_INDEX': 1, - 'SVMPERF_HOME': './svm_perf_quantification' + 'SVMPERF_HOME': './svm_perf_quantification', + 'N_JOBS': 1 } +def get_njobs(n_jobs): + return environ['N_JOBS'] if n_jobs is None else n_jobs + + + + diff --git a/quapy/data/preprocessing.py b/quapy/data/preprocessing.py index f04f010..a987900 100644 --- a/quapy/data/preprocessing.py +++ b/quapy/data/preprocessing.py @@ -169,7 +169,7 @@ class IndexTransformer: self.pad = self.add_word(qp.environ['PAD_TOKEN'], qp.environ['PAD_INDEX']) return self - def transform(self, X, n_jobs=-1): + def transform(self, X, n_jobs=None): """ Transforms the strings in `X` as lists of numerical ids @@ -179,6 +179,7 @@ class IndexTransformer: """ # given the number of tasks and the number of jobs, generates the slices for the parallel processes assert self.unk != -1, 'transform called before fit' + n_jobs = qp.get_njobs(n_jobs) indexed = map_parallel(func=self._index, args=X, n_jobs=n_jobs) return np.asarray(indexed) @@ -186,7 +187,7 @@ class IndexTransformer: vocab = self.vocabulary_.copy() return [[vocab.prevalence(word, self.unk) for word in self.analyzer(doc)] for doc in tqdm(documents, 'indexing')] - def fit_transform(self, X, n_jobs=-1): + def fit_transform(self, X, n_jobs=None): """ Fits the transform on `X` and transforms it. diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index ca4b25c..c2f4717 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -207,6 +207,8 @@ def cross_generate_predictions( n_jobs ): + n_jobs = qp.get_njobs(n_jobs) + if isinstance(val_split, int): assert fit_learner == True, \ 'the parameters for the adjustment cannot be estimated with kFCV with fit_learner=False' @@ -331,10 +333,10 @@ class ACC(AggregativeQuantifier): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=1): + def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=None): self.learner = learner self.val_split = val_split - self.n_jobs = n_jobs + self.n_jobs = qp.get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): """ @@ -437,10 +439,10 @@ class PACC(AggregativeProbabilisticQuantifier): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=1): + def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=None): self.learner = learner self.val_split = val_split - self.n_jobs = n_jobs + self.n_jobs = qp.get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): """ @@ -769,10 +771,10 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=1): + def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=None): self.learner = learner self.val_split = val_split - self.n_jobs = n_jobs + self.n_jobs = qp.get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): self._check_binary(data, "Threshold Optimization") @@ -1022,13 +1024,13 @@ class OneVsAll(AggregativeQuantifier): :param n_jobs: number of parallel workers """ - def __init__(self, binary_quantifier, n_jobs=-1): + def __init__(self, binary_quantifier, n_jobs=None): assert isinstance(self.binary_quantifier, BaseQuantifier), \ f'{self.binary_quantifier} does not seem to be a Quantifier' assert isinstance(self.binary_quantifier, AggregativeQuantifier), \ f'{self.binary_quantifier} does not seem to be of type Aggregative' self.binary_quantifier = binary_quantifier - self.n_jobs = n_jobs + self.n_jobs = qp.get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_learner=True): assert not data.binary, \ diff --git a/quapy/method/base.py b/quapy/method/base.py index 6c2a0c5..c935735 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -1,6 +1,6 @@ from abc import ABCMeta, abstractmethod from copy import deepcopy - +import quapy as qp from quapy.data import LabelledCollection @@ -63,17 +63,18 @@ class BinaryQuantifier(BaseQuantifier): assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \ f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.' + class OneVsAllGeneric: """ Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1. """ - def __init__(self, binary_quantifier, n_jobs=1): + def __init__(self, binary_quantifier, n_jobs=None): assert isinstance(binary_quantifier, BaseQuantifier), \ f'{binary_quantifier} does not seem to be a Quantifier' self.binary_quantifier = binary_quantifier - self.n_jobs = n_jobs + self.n_jobs = qp.get_njobs(n_jobs) def fit(self, data: LabelledCollection, **kwargs): assert not data.binary, \ diff --git a/quapy/method/meta.py b/quapy/method/meta.py index d5e8c2a..5e084e5 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -72,7 +72,7 @@ class Ensemble(BaseQuantifier): policy='ave', max_sample_size=None, val_split:Union[qp.data.LabelledCollection, float]=None, - n_jobs=1, + n_jobs=None, verbose=False): assert policy in Ensemble.VALID_POLICIES, \ f'unknown policy={policy}; valid are {Ensemble.VALID_POLICIES}' @@ -84,7 +84,7 @@ class Ensemble(BaseQuantifier): self.red_size = red_size self.policy = policy self.val_split = val_split - self.n_jobs = n_jobs + self.n_jobs = qp.get_njobs(n_jobs) self.post_proba_fn = None self.verbose = verbose self.max_sample_size = max_sample_size diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 7d71023..c227db8 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -37,7 +37,7 @@ class GridSearchQ(BaseQuantifier): error: Union[Callable, str] = qp.error.mae, refit=True, timeout=-1, - n_jobs=1, + n_jobs=None, verbose=False): self.model = model @@ -45,7 +45,7 @@ class GridSearchQ(BaseQuantifier): self.protocol = protocol self.refit = refit self.timeout = timeout - self.n_jobs = n_jobs + self.n_jobs = qp.get_njobs(n_jobs) self.verbose = verbose self.__check_error(error) assert isinstance(protocol, AbstractProtocol), 'unknown protocol' @@ -76,7 +76,6 @@ class GridSearchQ(BaseQuantifier): params_values = list(self.param_grid.values()) protocol = self.protocol - n_jobs = self.n_jobs self.param_scores_ = {} self.best_score_ = None @@ -84,7 +83,7 @@ class GridSearchQ(BaseQuantifier): tinit = time() hyper = [dict({k: values[i] for i, k in enumerate(params_keys)}) for values in itertools.product(*params_values)] - scores = qp.util.parallel(self._delayed_eval, ((params, training) for params in hyper), n_jobs=n_jobs) + scores = qp.util.parallel(self._delayed_eval, ((params, training) for params in hyper), n_jobs=self.n_jobs) for params, score, model in scores: if score is not None: diff --git a/quapy/util.py b/quapy/util.py index 952c2da..259178e 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -11,7 +11,7 @@ import numpy as np from joblib import Parallel, delayed -def _get_parallel_slices(n_tasks, n_jobs=-1): +def _get_parallel_slices(n_tasks, n_jobs): if n_jobs == -1: n_jobs = multiprocessing.cpu_count() batch = int(n_tasks / n_jobs) @@ -48,7 +48,9 @@ def parallel(func, args, n_jobs): """ print('n_jobs',n_jobs) def func_dec(environ, *args): - qp.environ = environ + qp.environ = environ.copy() + qp.environ['N_JOBS'] = 1 + print(f'setting n_jobs from {environ["N_JOBS"]} to 1') return func(*args) return Parallel(n_jobs=n_jobs)( delayed(func_dec)(qp.environ, args_i) for args_i in args From 789b9d5fbc963cb0e7e8c01ea1ee2338dc72fe1f Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Wed, 15 Jun 2022 14:36:02 +0200 Subject: [PATCH 11/59] pathfix in lequa2022 datasets --- quapy/data/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 06ba3d0..8e58540 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -580,7 +580,7 @@ def fetch_lequa2022(task, data_home=None): test_samples_path = join(lequa_dir, task, 'public', 'dev_samples') test_true_prev_path = join(lequa_dir, task, 'public', 'test_prevalences.txt') - test_gen = SamplesFromDir(val_samples_path, val_true_prev_path, load_fn=load_fn) + test_gen = SamplesFromDir(test_samples_path, test_true_prev_path, load_fn=load_fn) return train, val_gen, test_gen From c795404e7f0db29939d6b33093799c3482fbc7ab Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Wed, 15 Jun 2022 16:54:42 +0200 Subject: [PATCH 12/59] import fix --- quapy/model_selection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/quapy/model_selection.py b/quapy/model_selection.py index c227db8..d627649 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -2,9 +2,9 @@ import itertools import signal from copy import deepcopy from typing import Union, Callable -import evaluation import quapy as qp -from protocol import AbstractProtocol, OnLabelledCollectionProtocol +from quapy import evaluation +from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol from quapy.data.base import LabelledCollection from quapy.method.aggregative import BaseQuantifier from time import time From a7c768bb40b5f2d56743c7b5f9881dd79376346c Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Thu, 16 Jun 2022 16:38:34 +0200 Subject: [PATCH 13/59] param fix --- quapy/data/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quapy/data/base.py b/quapy/data/base.py index 4601c15..1125449 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -430,7 +430,7 @@ class Dataset: """ return self.training, self.test - def stats(self, show): + def stats(self, show=True): """ Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,: From c0c37f0a178164aacbb626181a1fe43bd3973d37 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Thu, 16 Jun 2022 16:54:15 +0200 Subject: [PATCH 14/59] return type in covariate protocol --- quapy/protocol.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/quapy/protocol.py b/quapy/protocol.py index fec37ca..f8b828f 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -301,7 +301,8 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): repeats=1, prevalence=None, mixture_points=11, - random_seed=None): + random_seed=None, + return_type='sample_prev'): super(CovariateShiftPP, self).__init__(random_seed) self.A = domainA self.B = domainB @@ -322,6 +323,7 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): assert all(np.logical_and(self.mixture_points >= 0, self.mixture_points<=1)), \ 'mixture_model datatype not understood (expected int or a sequence of real values in [0,1])' self.random_seed = random_seed + self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): indexesA, indexesB = [], [] @@ -339,7 +341,7 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): indexesA, indexesB = indexes sampleA = self.A.sampling_from_index(indexesA) sampleB = self.B.sampling_from_index(indexesB) - return (sampleA+sampleB).Xp + return self.collator(sampleA+sampleB) def total(self): return self.repeats * len(self.mixture_points) From cf0bd14cf193c13da0328c12b78236b5408072ef Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Fri, 17 Jun 2022 12:51:52 +0200 Subject: [PATCH 15/59] bug fix in covariate shift protocol --- quapy/protocol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quapy/protocol.py b/quapy/protocol.py index f8b828f..ac9680f 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -341,7 +341,7 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): indexesA, indexesB = indexes sampleA = self.A.sampling_from_index(indexesA) sampleB = self.B.sampling_from_index(indexesB) - return self.collator(sampleA+sampleB) + return sampleA+sampleB def total(self): return self.repeats * len(self.mixture_points) From f4a2a94ba503ff11b13f61b5401918d1c89f8fd6 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 21 Jun 2022 10:27:06 +0200 Subject: [PATCH 16/59] fixing random_state in base and in protocols --- quapy/data/base.py | 16 ++++++---- quapy/protocol.py | 54 +++++++++++++++++----------------- quapy/tests/test_evaluation.py | 2 +- quapy/tests/test_modsel.py | 8 ++--- quapy/tests/test_protocols.py | 8 ++--- quapy/util.py | 1 - 6 files changed, 47 insertions(+), 42 deletions(-) diff --git a/quapy/data/base.py b/quapy/data/base.py index 1125449..3c9bb67 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -2,7 +2,7 @@ import numpy as np from scipy.sparse import issparse from scipy.sparse import vstack from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold - +from numpy.random import RandomState from quapy.functional import strprev @@ -146,16 +146,21 @@ class LabelledCollection: return indexes_sample - def uniform_sampling_index(self, size): + def uniform_sampling_index(self, size, random_state=None): """ Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn without replacement if the requested size is greater than the number of instances, or with replacement otherwise. :param size: integer, the size of the uniform sample + :param random_state: if specified, guarantees reproducibility of the split. :return: a np.ndarray of shape `(size)` with the indexes """ - return np.random.choice(len(self), size, replace=size > len(self)) + if random_state is not None: + ng = RandomState(seed=random_state) + else: + ng = np.random + return ng.choice(len(self), size, replace=size > len(self)) def sampling(self, size, *prevs, shuffle=True): """ @@ -174,16 +179,17 @@ class LabelledCollection: prev_index = self.sampling_index(size, *prevs, shuffle=shuffle) return self.sampling_from_index(prev_index) - def uniform_sampling(self, size): + def uniform_sampling(self, size, random_state=None): """ Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn without replacement if the requested size is greater than the number of instances, or with replacement otherwise. :param size: integer, the requested size + :param random_state: if specified, guarantees reproducibility of the split. :return: an instance of :class:`LabelledCollection` with length == `size` """ - unif_index = self.uniform_sampling_index(size) + unif_index = self.uniform_sampling_index(size, random_state=random_state) return self.sampling_from_index(unif_index) def sampling_from_index(self, index): diff --git a/quapy/protocol.py b/quapy/protocol.py index f8b828f..c232ebc 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -40,22 +40,22 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): needed for extracting the samples, and :meth:`sample` that, given some parameters as input, deterministically generates a sample. - :param seed: the seed for allowing to replicate any sequence of samples. Default is None, meaning that + :param random_state: the seed for allowing to replicate any sequence of samples. Default is None, meaning that the sequence will be different every time the protocol is called. """ - _random_seed = -1 # means "not set" + _random_state = -1 # means "not set" - def __init__(self, seed=None): - self.random_seed = seed + def __init__(self, random_state=None): + self.random_state = random_state @property - def random_seed(self): - return self._random_seed + def random_state(self): + return self._random_state - @random_seed.setter - def random_seed(self, seed): - self._random_seed = seed + @random_state.setter + def random_state(self, random_state): + self._random_state = random_state @abstractmethod def samples_parameters(self): @@ -78,11 +78,11 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): def __call__(self): with ExitStack() as stack: - if self.random_seed == -1: + if self.random_state == -1: raise ValueError('The random seed has never been initialized. ' 'Set it to None not to impose replicability.') - if self.random_seed is not None: - stack.enter_context(qp.util.temp_seed(self.random_seed)) + if self.random_state is not None: + stack.enter_context(qp.util.temp_seed(self.random_state)) for params in self.samples_parameters(): yield self.collator(self.sample(params)) @@ -132,11 +132,11 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the grid (default is 21) :param repeats: number of copies for each valid prevalence vector (default is 10) - :param random_seed: allows replicating samples across runs (default None) + :param random_state: allows replicating samples across runs (default None) """ - def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, random_seed=None, return_type='sample_prev'): - super(APP, self).__init__(random_seed) + def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, random_state=None, return_type='sample_prev'): + super(APP, self).__init__(random_state) self.data = data self.sample_size = sample_size self.n_prevalences = n_prevalences @@ -189,15 +189,15 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param data: a `LabelledCollection` from which the samples will be drawn :param sample_size: integer, the number of instances in each sample :param repeats: the number of samples to generate. Default is 100. - :param random_seed: allows replicating samples across runs (default None) + :param random_state: allows replicating samples across runs (default None) """ - def __init__(self, data:LabelledCollection, sample_size, repeats=100, random_seed=None, return_type='sample_prev'): - super(NPP, self).__init__(random_seed) + def __init__(self, data:LabelledCollection, sample_size, repeats=100, random_state=None, return_type='sample_prev'): + super(NPP, self).__init__(random_state) self.data = data self.sample_size = sample_size self.repeats = repeats - self.random_seed = random_seed + self.random_state = random_state self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): @@ -226,15 +226,15 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) :param data: a `LabelledCollection` from which the samples will be drawn :param sample_size: integer, the number of instances in each sample :param repeats: the number of samples to generate. Default is 100. - :param random_seed: allows replicating samples across runs (default None) + :param random_state: allows replicating samples across runs (default None) """ - def __init__(self, data: LabelledCollection, sample_size, repeats=100, random_seed=None, return_type='sample_prev'): - super(USimplexPP, self).__init__(random_seed) + def __init__(self, data: LabelledCollection, sample_size, repeats=100, random_state=None, return_type='sample_prev'): + super(USimplexPP, self).__init__(random_state) self.data = data self.sample_size = sample_size self.repeats = repeats - self.random_seed = random_seed + self.random_state = random_state self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): @@ -290,7 +290,7 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself. the specific points - :param random_seed: + :param random_state: """ def __init__( @@ -301,9 +301,9 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): repeats=1, prevalence=None, mixture_points=11, - random_seed=None, + random_state=None, return_type='sample_prev'): - super(CovariateShiftPP, self).__init__(random_seed) + super(CovariateShiftPP, self).__init__(random_state) self.A = domainA self.B = domainB self.sample_size = sample_size @@ -322,7 +322,7 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): self.mixture_points = np.asarray(mixture_points) assert all(np.logical_and(self.mixture_points >= 0, self.mixture_points<=1)), \ 'mixture_model datatype not understood (expected int or a sequence of real values in [0,1])' - self.random_seed = random_seed + self.random_state = random_state self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): diff --git a/quapy/tests/test_evaluation.py b/quapy/tests/test_evaluation.py index 73dc485..9a77867 100644 --- a/quapy/tests/test_evaluation.py +++ b/quapy/tests/test_evaluation.py @@ -12,7 +12,7 @@ class EvalTestCase(unittest.TestCase): data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=10, pickle=True) train, test = data.training, data.test - protocol = qp.protocol.APP(test, sample_size=1000, n_prevalences=11, repeats=1, random_seed=1) + protocol = qp.protocol.APP(test, sample_size=1000, n_prevalences=11, repeats=1, random_state=1) class SlowLR(LogisticRegression): def predict_proba(self, X): diff --git a/quapy/tests/test_modsel.py b/quapy/tests/test_modsel.py index 9c6604a..d54dcbe 100644 --- a/quapy/tests/test_modsel.py +++ b/quapy/tests/test_modsel.py @@ -21,7 +21,7 @@ class ModselTestCase(unittest.TestCase): training, validation = data.training.split_stratified(0.7, random_state=1) param_grid = {'C': np.logspace(-3,3,7)} - app = APP(validation, sample_size=100, random_seed=1) + app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, verbose=True ).fit(training) @@ -40,7 +40,7 @@ class ModselTestCase(unittest.TestCase): # test = data.test param_grid = {'C': np.logspace(-3,3,7)} - app = APP(validation, sample_size=100, random_seed=1) + app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, n_jobs=-1, verbose=True ).fit(training) @@ -62,7 +62,7 @@ class ModselTestCase(unittest.TestCase): training, validation = data.training.split_stratified(0.7, random_state=1) param_grid = {'C': np.logspace(-3, 3, 7)} - app = APP(validation, sample_size=100, random_seed=1) + app = APP(validation, sample_size=100, random_state=1) tinit = time.time() GridSearchQ( @@ -96,7 +96,7 @@ class ModselTestCase(unittest.TestCase): # test = data.test param_grid = {'C': np.logspace(-3,3,7)} - app = APP(validation, sample_size=100, random_seed=1) + app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=3, n_jobs=-1, verbose=True ) diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index aeb1f4e..dea3290 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -21,7 +21,7 @@ class TestProtocols(unittest.TestCase): def test_app_replicate(self): data = mock_labelled_collection() - p = APP(data, sample_size=5, n_prevalences=11, random_seed=42) + p = APP(data, sample_size=5, n_prevalences=11, random_state=42) samples1 = samples_to_str(p) samples2 = samples_to_str(p) @@ -57,7 +57,7 @@ class TestProtocols(unittest.TestCase): def test_npp_replicate(self): data = mock_labelled_collection() - p = NPP(data, sample_size=5, repeats=5, random_seed=42) + p = NPP(data, sample_size=5, repeats=5, random_state=42) samples1 = samples_to_str(p) samples2 = samples_to_str(p) @@ -75,7 +75,7 @@ class TestProtocols(unittest.TestCase): def test_kraemer_replicate(self): data = mock_labelled_collection() - p = USimplexPP(data, sample_size=5, repeats=10, random_seed=42) + p = USimplexPP(data, sample_size=5, repeats=10, random_state=42) samples1 = samples_to_str(p) samples2 = samples_to_str(p) @@ -94,7 +94,7 @@ class TestProtocols(unittest.TestCase): def test_covariate_shift_replicate(self): dataA = mock_labelled_collection('domA') dataB = mock_labelled_collection('domB') - p = CovariateShiftPP(dataA, dataB, sample_size=10, mixture_points=11, random_seed=1) + p = CovariateShiftPP(dataA, dataB, sample_size=10, mixture_points=11, random_state=1) samples1 = samples_to_str(p) samples2 = samples_to_str(p) diff --git a/quapy/util.py b/quapy/util.py index 259178e..049ebed 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -50,7 +50,6 @@ def parallel(func, args, n_jobs): def func_dec(environ, *args): qp.environ = environ.copy() qp.environ['N_JOBS'] = 1 - print(f'setting n_jobs from {environ["N_JOBS"]} to 1') return func(*args) return Parallel(n_jobs=n_jobs)( delayed(func_dec)(qp.environ, args_i) for args_i in args From 8f6aa629b83dc9ca1c689551cb31cb59f5b2c400 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 21 Jun 2022 10:49:30 +0200 Subject: [PATCH 17/59] param seed changed to random_state --- quapy/util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/quapy/util.py b/quapy/util.py index 049ebed..cb1eab3 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -57,17 +57,17 @@ def parallel(func, args, n_jobs): @contextlib.contextmanager -def temp_seed(seed): +def temp_seed(random_state): """ Can be used in a "with" context to set a temporal seed without modifying the outer numpy's current state. E.g.: >>> with temp_seed(random_seed): >>> pass # do any computation depending on np.random functionality - :param seed: the seed to set within the "with" context + :param random_state: the seed to set within the "with" context """ state = np.random.get_state() - np.random.seed(seed) + np.random.seed(random_state) try: yield finally: From cf7d37c7934dba6590f56c876f94643386114d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Tue, 21 Jun 2022 11:07:00 +0200 Subject: [PATCH 18/59] removing log message --- quapy/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/quapy/util.py b/quapy/util.py index cb1eab3..2ccf06d 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -46,7 +46,6 @@ def parallel(func, args, n_jobs): that takes the `quapy.environ` variable as input silently """ - print('n_jobs',n_jobs) def func_dec(environ, *args): qp.environ = environ.copy() qp.environ['N_JOBS'] = 1 From 02dd2846ff4db54a6e6eedb35b15ffc98dad38bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Fri, 24 Jun 2022 14:05:47 +0200 Subject: [PATCH 19/59] changing app to use prevalence_linspace function with smooth limits --- quapy/protocol.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/quapy/protocol.py b/quapy/protocol.py index 69b99ad..7652eeb 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -132,15 +132,17 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the grid (default is 21) :param repeats: number of copies for each valid prevalence vector (default is 10) + :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 :param random_state: allows replicating samples across runs (default None) """ - def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, random_state=None, return_type='sample_prev'): + def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=None, return_type='sample_prev'): super(APP, self).__init__(random_state) self.data = data self.sample_size = sample_size self.n_prevalences = n_prevalences self.repeats = repeats + self.smooth_limits_epsilon = smooth_limits_epsilon self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def prevalence_grid(self): @@ -159,7 +161,7 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): in the grid multiplied by `repeat` """ dimensions = self.data.n_classes - s = np.linspace(0., 1., self.n_prevalences, endpoint=True) + s = F.prevalence_linspace(self.n_prevalences, repeats=1, smooth_limits_epsilon=self.smooth_limits_epsilon) s = [s] * (dimensions - 1) prevs = [p for p in itertools.product(*s, repeat=1) if (sum(p) <= 1.0)] prevs = np.asarray(prevs).reshape(len(prevs), -1) From 750814ef2a4a74e60f5ecd857d784211813d6caf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Fri, 24 Jun 2022 14:20:08 +0200 Subject: [PATCH 20/59] fixing bug in ACC when using cross validation --- quapy/method/aggregative.py | 1 + 1 file changed, 1 insertion(+) diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index c2f4717..759a853 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -223,6 +223,7 @@ def cross_generate_predictions( # fit the learner on all data learner.fit(*data.Xy) + y = data.y classes = data.classes_ else: learner, val_data = _training_helper( From 46e294002f3f3fc43149c41f08198ed810a4e33a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Mon, 11 Jul 2022 12:21:49 +0200 Subject: [PATCH 21/59] dys implementation --- quapy/functional.py | 6 +++ quapy/method/__init__.py | 1 + quapy/method/aggregative.py | 79 ++++++++++++++++++++++++++++++++++++- 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/quapy/functional.py b/quapy/functional.py index e44dacf..8cf0312 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -78,6 +78,12 @@ def HellingerDistance(P, Q): """ return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2)) +def TopsoeDistance(P, Q, epsilon=1e-20): + """ Topsoe + """ + return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + + Q*np.log((2*Q+epsilon)/(P+Q+epsilon))) + def uniform_prevalence_sampling(n_classes, size=1): """ diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py index ddd7b26..8a30451 100644 --- a/quapy/method/__init__.py +++ b/quapy/method/__init__.py @@ -19,6 +19,7 @@ AGGREGATIVE_METHODS = { aggregative.PACC, aggregative.EMQ, aggregative.HDy, + aggregative.DyS, aggregative.X, aggregative.T50, aggregative.MAX, diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 759a853..ac6fdc3 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -1,6 +1,7 @@ from abc import abstractmethod from copy import deepcopy -from typing import Union +import string +from typing import Callable, Union import numpy as np from joblib import Parallel, delayed from sklearn.base import BaseEstimator @@ -172,7 +173,7 @@ def _training_helper(learner, if isinstance(val_split, float): if not (0 < val_split < 1): raise ValueError(f'train/val split {val_split} out of range, must be in (0,1)') - train, unused = data.split_stratified(train_prop=1 - val_split) + train, unused = data.split_stratified(train_prop=1 - val_split,random_state=0) elif isinstance(val_split, LabelledCollection): train = data unused = val_split @@ -637,6 +638,80 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier): return np.asarray([1 - class1_prev, class1_prev]) +class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): + """ + `DyS framework `_ (DyS). + DyS is a generalization of HDy method, using a Ternary Search in order to find the prevalence that + minimizes the distance between distributions. + Details for the ternary search have been got from + + :param learner: a sklearn's Estimator that generates a binary classifier + :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out + validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself). + :param n_bins: an int with the number of bins to use to compute the histograms. + :param distance: an str with a distance already included in the librar (HD or topsoe), of a function + that computes the distance between two distributions. + :param tol: a float with the tolerance for the ternary search algorithm. + """ + + def __init__(self, learner: BaseEstimator, val_split=0.4, n_bins=8, distance: Union[str, Callable]='HD', tol=1e-05): + self.learner = learner + self.val_split = val_split + self.tol = tol + self.distance = distance + self.n_bins = n_bins + + def _ternary_search(self, f, left, right, tol): + """ + Find maximum of unimodal function f() within [left, right] + """ + while abs(right - left) >= tol: + left_third = left + (right - left) / 3 + right_third = right - (right - left) / 3 + + if f(left_third) > f(right_third): + left = left_third + else: + right = right_third + + # Left and right are the current bounds; the maximum is between them + return (left + right) / 2 + + def _compute_distance(self, Px_train, Px_test, distance: Union[str, Callable]='HD'): + if distance=='HD': + return F.HellingerDistance(Px_train, Px_test) + elif distance=='topsoe': + return F.TopsoeDistance(Px_train, Px_test) + else: + return distance(Px_train, Px_test) + + def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection] = None): + if val_split is None: + val_split = self.val_split + + self._check_binary(data, self.__class__.__name__) + self.learner, validation = _training_helper( + self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split) + Px = self.classify(validation.instances)[:, 1] # takes only the P(y=+1|x) + self.Pxy1 = Px[validation.labels == self.learner.classes_[1]] + self.Pxy0 = Px[validation.labels == self.learner.classes_[0]] + self.Pxy1_density = np.histogram(self.Pxy1, bins=self.n_bins, range=(0, 1), density=True)[0] + self.Pxy0_density = np.histogram(self.Pxy0, bins=self.n_bins, range=(0, 1), density=True)[0] + return self + + def aggregate(self, classif_posteriors): + Px = classif_posteriors[:, 1] # takes only the P(y=+1|x) + + Px_test = np.histogram(Px, bins=self.n_bins, range=(0, 1), density=True)[0] + + def distribution_distance(prev): + Px_train = prev * self.Pxy1_density + (1 - prev) * self.Pxy0_density + return self._compute_distance(Px_train,Px_test,self.distance) + + class1_prev = self._ternary_search(f=distribution_distance, left=0, right=1, tol=self.tol) + return np.asarray([1 - class1_prev, class1_prev]) + + class ELM(AggregativeQuantifier, BinaryQuantifier): """ Class of Explicit Loss Minimization (ELM) quantifiers. From ecd0ad7ec7c40db811c045aea0b7ee3c71e97594 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 11 Jul 2022 14:00:25 +0200 Subject: [PATCH 22/59] unit test for replicability based on qp.util.temp_seed --- quapy/method/aggregative.py | 1 + quapy/tests/test_replicability.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 quapy/tests/test_replicability.py diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 759a853..e40e96c 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -438,6 +438,7 @@ class PACC(AggregativeProbabilisticQuantifier): validation data, or as an integer, indicating that the misclassification rates should be estimated via `k`-fold cross validation (this integer stands for the number of folds `k`), or as a :class:`quapy.data.base.LabelledCollection` (the split itself). + :param n_jobs: number of parallel workers """ def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=None): diff --git a/quapy/tests/test_replicability.py b/quapy/tests/test_replicability.py new file mode 100644 index 0000000..329ac32 --- /dev/null +++ b/quapy/tests/test_replicability.py @@ -0,0 +1,30 @@ +import unittest +import quapy as qp +from quapy.functional import strprev +from sklearn.linear_model import LogisticRegression + +from method.aggregative import PACC + + +class MyTestCase(unittest.TestCase): + def test_replicability(self): + + dataset = qp.datasets.fetch_UCIDataset('yeast') + + with qp.util.temp_seed(0): + lr = LogisticRegression(random_state=0, max_iter=10000) + pacc = PACC(lr) + prev = pacc.fit(dataset.training).quantify(dataset.test.X) + str_prev1 = strprev(prev, prec=5) + + with qp.util.temp_seed(0): + lr = LogisticRegression(random_state=0, max_iter=10000) + pacc = PACC(lr) + prev2 = pacc.fit(dataset.training).quantify(dataset.test.X) + str_prev2 = strprev(prev2, prec=5) + + self.assertEqual(str_prev1, str_prev2) # add assertion here + + +if __name__ == '__main__': + unittest.main() From 428f10fb2d09021b34cc7bf2c8d40199f5943f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Mon, 11 Jul 2022 14:04:28 +0200 Subject: [PATCH 23/59] adding SMM --- quapy/method/aggregative.py | 41 ++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index ac6fdc3..a2e03ae 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -173,7 +173,7 @@ def _training_helper(learner, if isinstance(val_split, float): if not (0 < val_split < 1): raise ValueError(f'train/val split {val_split} out of range, must be in (0,1)') - train, unused = data.split_stratified(train_prop=1 - val_split,random_state=0) + train, unused = data.split_stratified(train_prop=1 - val_split) elif isinstance(val_split, LabelledCollection): train = data unused = val_split @@ -712,6 +712,45 @@ class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): return np.asarray([1 - class1_prev, class1_prev]) +class SMM(AggregativeProbabilisticQuantifier, BinaryQuantifier): + """ + `SMM method `_ (SMM). + SMM is a simplification of matching distribution methods where the representation of the examples + is created using the mean instead of a histogram. + + :param learner: a sklearn's Estimator that generates a binary classifier. + :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out + validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself). + """ + + def __init__(self, learner: BaseEstimator, val_split=0.4): + self.learner = learner + self.val_split = val_split + + def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection] = None): + if val_split is None: + val_split = self.val_split + + self._check_binary(data, self.__class__.__name__) + self.learner, validation = _training_helper( + self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split) + Px = self.classify(validation.instances)[:, 1] # takes only the P(y=+1|x) + self.Pxy1 = Px[validation.labels == self.learner.classes_[1]] + self.Pxy0 = Px[validation.labels == self.learner.classes_[0]] + self.Pxy1_mean = np.mean(self.Pxy1) + self.Pxy0_mean = np.mean(self.Pxy0) + return self + + def aggregate(self, classif_posteriors): + Px = classif_posteriors[:, 1] # takes only the P(y=+1|x) + Px_mean = np.mean(Px) + + class1_prev = (Px_mean - self.Pxy0_mean)/(self.Pxy1_mean - self.Pxy0_mean) + class1_prev = np.clip(class1_prev, 0, 1) + + return np.asarray([1 - class1_prev, class1_prev]) + + class ELM(AggregativeQuantifier, BinaryQuantifier): """ Class of Explicit Loss Minimization (ELM) quantifiers. From c91961cff5b12bc8631602ff367d7d3cce4d2904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Mon, 11 Jul 2022 14:10:04 +0200 Subject: [PATCH 24/59] adding to __init__.py --- quapy/method/__init__.py | 1 + quapy/method/aggregative.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py index 8a30451..01c19bc 100644 --- a/quapy/method/__init__.py +++ b/quapy/method/__init__.py @@ -20,6 +20,7 @@ AGGREGATIVE_METHODS = { aggregative.EMQ, aggregative.HDy, aggregative.DyS, + aggregative.SMM, aggregative.X, aggregative.T50, aggregative.MAX, diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index a2e03ae..7ab73fb 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -1,6 +1,5 @@ from abc import abstractmethod from copy import deepcopy -import string from typing import Callable, Union import numpy as np from joblib import Parallel, delayed From a4584b79dbf30517f86effccc6208ed28c36d396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Mon, 11 Jul 2022 16:27:02 +0200 Subject: [PATCH 25/59] changing gridsearchQ to ensure reproducibility --- quapy/model_selection.py | 3 ++- quapy/util.py | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/quapy/model_selection.py b/quapy/model_selection.py index d627649..41a7a19 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -83,7 +83,8 @@ class GridSearchQ(BaseQuantifier): tinit = time() hyper = [dict({k: values[i] for i, k in enumerate(params_keys)}) for values in itertools.product(*params_values)] - scores = qp.util.parallel(self._delayed_eval, ((params, training) for params in hyper), n_jobs=self.n_jobs) + #pass a seed to parallel so it is set in clild processes + scores = qp.util.parallel(self._delayed_eval, ((params, training) for params in hyper), seed=qp.environ.get('_R_SEED', None), n_jobs=self.n_jobs) for params, score, model in scores: if score is not None: diff --git a/quapy/util.py b/quapy/util.py index 2ccf06d..94187e6 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -5,6 +5,7 @@ import os import pickle import urllib from pathlib import Path +from contextlib import ExitStack import quapy as qp import numpy as np @@ -36,7 +37,7 @@ def map_parallel(func, args, n_jobs): return list(itertools.chain.from_iterable(results)) -def parallel(func, args, n_jobs): +def parallel(func, args, n_jobs, seed = None): """ A wrapper of multiprocessing: @@ -44,14 +45,20 @@ def parallel(func, args, n_jobs): >>> delayed(func)(args_i) for args_i in args >>> ) - that takes the `quapy.environ` variable as input silently + that takes the `quapy.environ` variable as input silently. + Seeds the child processes to ensure reproducibility when n_jobs>1 """ - def func_dec(environ, *args): + def func_dec(environ, seed, *args): qp.environ = environ.copy() qp.environ['N_JOBS'] = 1 - return func(*args) + #set a context with a temporal seed to ensure results are reproducibles in parallel + with ExitStack() as stack: + if seed is not None: + stack.enter_context(qp.util.temp_seed(seed)) + return func(*args) + return Parallel(n_jobs=n_jobs)( - delayed(func_dec)(qp.environ, args_i) for args_i in args + delayed(func_dec)(qp.environ, None if seed is None else seed+i, args_i) for i, args_i in enumerate(args) ) @@ -66,6 +73,8 @@ def temp_seed(random_state): :param random_state: the seed to set within the "with" context """ state = np.random.get_state() + #save the seed just in case is needed (for instance for setting the seed to child processes) + qp.environ['_R_SEED'] = random_state np.random.seed(random_state) try: yield From f2550fdb829b8c324053af488fc5019dadd70537 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 4 Nov 2022 15:04:36 +0100 Subject: [PATCH 26/59] full example of training, model selection, and evaluation using the lequa2022 dataset with the new protocols --- examples/lequa2022_experiments.py | 26 ++++++++++++++++++++++++++ quapy/data/datasets.py | 17 ++++++++++++++++- quapy/error.py | 10 +++++----- quapy/evaluation.py | 2 +- 4 files changed, 48 insertions(+), 7 deletions(-) create mode 100644 examples/lequa2022_experiments.py diff --git a/examples/lequa2022_experiments.py b/examples/lequa2022_experiments.py new file mode 100644 index 0000000..790e2c1 --- /dev/null +++ b/examples/lequa2022_experiments.py @@ -0,0 +1,26 @@ +import numpy as np +from sklearn.linear_model import LogisticRegression +import quapy as qp +from data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022 +from evaluation import evaluation_report +from method.aggregative import EMQ +from model_selection import GridSearchQ + + +task = 'T1A' + +qp.environ['SAMPLE_SIZE']=LEQUA2022_SAMPLE_SIZE[task] +training, val_generator, test_generator = fetch_lequa2022(task=task) + +# define the quantifier +quantifier = EMQ(learner=LogisticRegression()) + +# model selection +param_grid = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]} +model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, n_jobs=-1, refit=False, verbose=True) +quantifier = model_selection.fit(training) + +# evaluation +report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae'], verbose=True) + +print(report) diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 8e58540..b35343b 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -12,6 +12,7 @@ from quapy.data.preprocessing import text2tfidf, reduce_columns from quapy.data.reader import * from quapy.util import download_file_if_not_exists, download_file, get_quapy_home, pickled_resource + REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb'] TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders', 'semeval13', 'semeval14', 'semeval15', 'semeval16', @@ -45,6 +46,20 @@ UCI_DATASETS = ['acute.a', 'acute.b', LEQUA2022_TASKS = ['T1A', 'T1B', 'T2A', 'T2B'] +_TXA_SAMPLE_SIZE = 250 +_TXB_SAMPLE_SIZE = 1000 + +LEQUA2022_SAMPLE_SIZE = { + 'TXA': _TXA_SAMPLE_SIZE, + 'TXB': _TXB_SAMPLE_SIZE, + 'T1A': _TXA_SAMPLE_SIZE, + 'T1B': _TXB_SAMPLE_SIZE, + 'T2A': _TXA_SAMPLE_SIZE, + 'T2B': _TXB_SAMPLE_SIZE, + 'binary': _TXA_SAMPLE_SIZE, + 'multiclass': _TXB_SAMPLE_SIZE +} + def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False) -> Dataset: """ @@ -578,7 +593,7 @@ def fetch_lequa2022(task, data_home=None): val_true_prev_path = join(lequa_dir, task, 'public', 'dev_prevalences.txt') val_gen = SamplesFromDir(val_samples_path, val_true_prev_path, load_fn=load_fn) - test_samples_path = join(lequa_dir, task, 'public', 'dev_samples') + test_samples_path = join(lequa_dir, task, 'public', 'test_samples') test_true_prev_path = join(lequa_dir, task, 'public', 'test_prevalences.txt') test_gen = SamplesFromDir(test_samples_path, test_true_prev_path, load_fn=load_fn) diff --git a/quapy/error.py b/quapy/error.py index 3375470..2047929 100644 --- a/quapy/error.py +++ b/quapy/error.py @@ -11,11 +11,11 @@ def from_name(err_name): """ assert err_name in ERROR_NAMES, f'unknown error {err_name}' callable_error = globals()[err_name] - if err_name in QUANTIFICATION_ERROR_SMOOTH_NAMES: - eps = __check_eps() - def bound_callable_error(y_true, y_pred): - return callable_error(y_true, y_pred, eps) - return bound_callable_error + # if err_name in QUANTIFICATION_ERROR_SMOOTH_NAMES: + # eps = __check_eps() + # def bound_callable_error(y_true, y_pred): + # return callable_error(y_true, y_pred, eps) + # return bound_callable_error return callable_error diff --git a/quapy/evaluation.py b/quapy/evaluation.py index 57c2ed1..95193aa 100644 --- a/quapy/evaluation.py +++ b/quapy/evaluation.py @@ -41,7 +41,7 @@ def prediction(model: BaseQuantifier, protocol: AbstractProtocol, aggr_speedup=' def __prediction_helper(quantification_fn, protocol: AbstractProtocol, verbose=False): true_prevs, estim_prevs = [], [] - for sample_instances, sample_prev in tqdm(protocol(), total=protocol.total()) if verbose else protocol(): + for sample_instances, sample_prev in tqdm(protocol(), total=protocol.total(), desc='predicting') if verbose else protocol(): estim_prevs.append(quantification_fn(sample_instances)) true_prevs.append(sample_prev) From 6cb9f388e0ecc05ed5f19cdb8bfee147237b04c5 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 4 Nov 2022 15:06:08 +0100 Subject: [PATCH 27/59] full example of training, model selection, and evaluation using the lequa2022 dataset with the new protocols --- examples/lequa2022_experiments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/lequa2022_experiments.py b/examples/lequa2022_experiments.py index 790e2c1..91849e5 100644 --- a/examples/lequa2022_experiments.py +++ b/examples/lequa2022_experiments.py @@ -9,7 +9,7 @@ from model_selection import GridSearchQ task = 'T1A' -qp.environ['SAMPLE_SIZE']=LEQUA2022_SAMPLE_SIZE[task] +qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task] training, val_generator, test_generator = fetch_lequa2022(task=task) # define the quantifier From eafc82c96a943f43a3207665ab93f2c831ce81a9 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 4 Nov 2022 15:15:12 +0100 Subject: [PATCH 28/59] full example of training, model selection, and evaluation using the lequa2022 dataset with the new protocols --- examples/lequa2022_experiments.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/lequa2022_experiments.py b/examples/lequa2022_experiments.py index 91849e5..0df7d15 100644 --- a/examples/lequa2022_experiments.py +++ b/examples/lequa2022_experiments.py @@ -5,6 +5,7 @@ from data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022 from evaluation import evaluation_report from method.aggregative import EMQ from model_selection import GridSearchQ +import pandas as pd task = 'T1A' @@ -21,6 +22,8 @@ model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, n_ quantifier = model_selection.fit(training) # evaluation -report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae'], verbose=True) +report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True) +pd.set_option('display.max_columns', None) +pd.set_option('display.width', 1000) print(report) From fb79a292042ad71e9a694224c744a61e1ab55a65 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 8 Nov 2022 16:36:52 +0100 Subject: [PATCH 29/59] todos and change log --- TODO.txt | 3 ++- quapy/CHANGE_LOG.txt | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/TODO.txt b/TODO.txt index c20e901..90f3301 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,7 +1,8 @@ sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.) make truly parallel the GridSearchQ -abstract protocols +make more examples in the "examples" directory +merge with master, because I had to fix some problems with QuaNet due to an issue notified via GitHub! Packaging: diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 095bb76..6bef8b0 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -9,7 +9,7 @@ - ACC, PACC, Forman's threshold variants have been parallelized. -- Exploration of hyperparameters in Model selection can now be run in parallel (it was a n_jobs argument in +- Exploration of hyperparameters in Model selection can now be run in parallel (there was a n_jobs argument in QuaPy 0.1.6 but only the evaluation part for one specific hyperparameter was run in parallel). - The prediction function has been refactored, so it applies the optimization for aggregative quantifiers (that @@ -20,6 +20,8 @@ - n_jobs is now taken from the environment if set to None +- examples directory created! + Things to fix: - clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance(): this is not working; I don't know how to make the isinstance work. Looks like there is some problem with the From 643a19228bc073e2a74c74967fddf18e67caefd6 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 28 Nov 2022 12:02:08 +0100 Subject: [PATCH 30/59] data reader for lequa 2022 competition --- quapy/data/_lequa2022.py | 169 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 quapy/data/_lequa2022.py diff --git a/quapy/data/_lequa2022.py b/quapy/data/_lequa2022.py new file mode 100644 index 0000000..79ccccc --- /dev/null +++ b/quapy/data/_lequa2022.py @@ -0,0 +1,169 @@ +from typing import Tuple, Union +import pandas as pd +import numpy as np +import os + +from quapy.protocol import AbstractProtocol + +DEV_SAMPLES = 1000 +TEST_SAMPLES = 5000 + +ERROR_TOL = 1E-3 + + +def load_category_map(path): + cat2code = {} + with open(path, 'rt') as fin: + for line in fin: + category, code = line.split() + cat2code[category] = int(code) + code2cat = [cat for cat, code in sorted(cat2code.items(), key=lambda x: x[1])] + return cat2code, code2cat + + +def load_raw_documents(path): + df = pd.read_csv(path) + documents = list(df["text"].values) + labels = None + if "label" in df.columns: + labels = df["label"].values.astype(np.int) + return documents, labels + + +def load_vector_documents(path): + D = pd.read_csv(path).to_numpy(dtype=np.float) + labelled = D.shape[1] == 301 + if labelled: + X, y = D[:, 1:], D[:, 0].astype(np.int).flatten() + else: + X, y = D, None + return X, y + + +class SamplesFromDir(AbstractProtocol): + + def __init__(self, path_dir:str, ground_truth_path:str, load_fn): + self.path_dir = path_dir + self.load_fn = load_fn + self.true_prevs = ResultSubmission.load(ground_truth_path) + + def __call__(self): + for id, prevalence in self.true_prevs.iterrows(): + sample, _ = self.load_fn(os.path.join(self.path_dir, f'{id}.txt')) + yield sample, prevalence + + +class ResultSubmission: + + def __init__(self): + self.df = None + + def __init_df(self, categories: int): + if not isinstance(categories, int) or categories < 2: + raise TypeError('wrong format for categories: an int (>=2) was expected') + df = pd.DataFrame(columns=list(range(categories))) + df.index.set_names('id', inplace=True) + self.df = df + + @property + def n_categories(self): + return len(self.df.columns.values) + + def add(self, sample_id: int, prevalence_values: np.ndarray): + if not isinstance(sample_id, int): + raise TypeError(f'error: expected int for sample_sample, found {type(sample_id)}') + if not isinstance(prevalence_values, np.ndarray): + raise TypeError(f'error: expected np.ndarray for prevalence_values, found {type(prevalence_values)}') + if self.df is None: + self.__init_df(categories=len(prevalence_values)) + if sample_id in self.df.index.values: + raise ValueError(f'error: prevalence values for "{sample_id}" already added') + if prevalence_values.ndim != 1 and prevalence_values.size != self.n_categories: + raise ValueError(f'error: wrong shape found for prevalence vector {prevalence_values}') + if (prevalence_values < 0).any() or (prevalence_values > 1).any(): + raise ValueError(f'error: prevalence values out of range [0,1] for "{sample_id}"') + if np.abs(prevalence_values.sum() - 1) > ERROR_TOL: + raise ValueError(f'error: prevalence values do not sum up to one for "{sample_id}"' + f'(error tolerance {ERROR_TOL})') + + self.df.loc[sample_id] = prevalence_values + + def __len__(self): + return len(self.df) + + @classmethod + def load(cls, path: str) -> 'ResultSubmission': + df = ResultSubmission.check_file_format(path) + r = ResultSubmission() + r.df = df + return r + + def dump(self, path: str): + ResultSubmission.check_dataframe_format(self.df) + self.df.to_csv(path) + + def prevalence(self, sample_id: int): + sel = self.df.loc[sample_id] + if sel.empty: + return None + else: + return sel.values.flatten() + + def iterrows(self): + for index, row in self.df.iterrows(): + prevalence = row.values.flatten() + yield index, prevalence + + @classmethod + def check_file_format(cls, path) -> Union[pd.DataFrame, Tuple[pd.DataFrame, str]]: + try: + df = pd.read_csv(path, index_col=0) + except Exception as e: + print(f'the file {path} does not seem to be a valid csv file. ') + print(e) + return ResultSubmission.check_dataframe_format(df, path=path) + + @classmethod + def check_dataframe_format(cls, df, path=None) -> Union[pd.DataFrame, Tuple[pd.DataFrame, str]]: + hint_path = '' # if given, show the data path in the error message + if path is not None: + hint_path = f' in {path}' + + if df.index.name != 'id' or len(df.columns) < 2: + raise ValueError(f'wrong header{hint_path}, ' + f'the format of the header should be "id,0,...,n-1", ' + f'where n is the number of categories') + if [int(ci) for ci in df.columns.values] != list(range(len(df.columns))): + raise ValueError(f'wrong header{hint_path}, category ids should be 0,1,2,...,n-1, ' + f'where n is the number of categories') + if df.empty: + raise ValueError(f'error{hint_path}: results file is empty') + elif len(df) != DEV_SAMPLES and len(df) != TEST_SAMPLES: + raise ValueError(f'wrong number of prevalence values found{hint_path}; ' + f'expected {DEV_SAMPLES} for development sets and ' + f'{TEST_SAMPLES} for test sets; found {len(df)}') + + ids = set(df.index.values) + expected_ids = set(range(len(df))) + if ids != expected_ids: + missing = expected_ids - ids + if missing: + raise ValueError(f'there are {len(missing)} missing ids{hint_path}: {sorted(missing)}') + unexpected = ids - expected_ids + if unexpected: + raise ValueError(f'there are {len(missing)} unexpected ids{hint_path}: {sorted(unexpected)}') + + for category_id in df.columns: + if (df[category_id] < 0).any() or (df[category_id] > 1).any(): + raise ValueError(f'error{hint_path} column "{category_id}" contains values out of range [0,1]') + + prevs = df.values + round_errors = np.abs(prevs.sum(axis=-1) - 1.) > ERROR_TOL + if round_errors.any(): + raise ValueError(f'warning: prevalence values in rows with id {np.where(round_errors)[0].tolist()} ' + f'do not sum up to 1 (error tolerance {ERROR_TOL}), ' + f'probably due to some rounding errors.') + + return df + + From eb860e9678c396d5ce5bcba703fb8e36a4ad0403 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 12 Dec 2022 09:34:09 +0100 Subject: [PATCH 31/59] adding the possibility to estimate the training prevalence, instead of using the true training prevalence, as a starting point in emq --- examples/lequa2022_experiments.py | 13 ++++++++++--- quapy/method/aggregative.py | 12 ++++++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/examples/lequa2022_experiments.py b/examples/lequa2022_experiments.py index 0df7d15..31ec651 100644 --- a/examples/lequa2022_experiments.py +++ b/examples/lequa2022_experiments.py @@ -1,6 +1,8 @@ import numpy as np +from sklearn.calibration import CalibratedClassifierCV from sklearn.linear_model import LogisticRegression import quapy as qp +import quapy.functional as F from data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022 from evaluation import evaluation_report from method.aggregative import EMQ @@ -14,7 +16,8 @@ qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task] training, val_generator, test_generator = fetch_lequa2022(task=task) # define the quantifier -quantifier = EMQ(learner=LogisticRegression()) +learner = CalibratedClassifierCV(LogisticRegression()) +quantifier = EMQ(learner=learner) # model selection param_grid = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]} @@ -24,6 +27,10 @@ quantifier = model_selection.fit(training) # evaluation report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True) -pd.set_option('display.max_columns', None) -pd.set_option('display.width', 1000) +# printing results +pd.set_option('display.expand_frame_repr', False) +report['estim-prev'] = report['estim-prev'].map(F.strprev) print(report) + +print('Averaged values:') +print(report.mean()) diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 19d365b..202b5dd 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -501,17 +501,25 @@ class EMQ(AggregativeProbabilisticQuantifier): maximum-likelihood estimation, in a mutually recursive way, until convergence. :param learner: a sklearn's Estimator that generates a classifier + :param exact_train_prev: set to True (default) for using, as the initial observation, the true training prevalence; + or set to False for computing the training prevalence as an estimate, akin to PCC, i.e., as the expected + value of the posterior probabilities of the trianing documents as suggested in + `Alexandari et al. paper `_: """ MAX_ITER = 1000 EPSILON = 1e-4 - def __init__(self, learner: BaseEstimator): + def __init__(self, learner: BaseEstimator, exact_train_prev=True): self.learner = learner + self.exact_train_prev = exact_train_prev def fit(self, data: LabelledCollection, fit_learner=True): self.learner, _ = _training_helper(self.learner, data, fit_learner, ensure_probabilistic=True) - self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_) + if self.exact_train_prev: + self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_) + else: + self.train_prevalence = PCC(learner=self.learner).fit(data, fit_learner=False).quantify(data.X) return self def aggregate(self, classif_posteriors, epsilon=EPSILON): From c20d9d5ea415d1fd67551f9744f797f840e20221 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 12 Dec 2022 17:32:30 +0100 Subject: [PATCH 32/59] the heuristic exact_train_prev is performed via kFCV, using a new function qp.model_selection.cross_val_predict --- TODO.txt | 2 ++ quapy/method/aggregative.py | 11 ++++++++--- quapy/model_selection.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/TODO.txt b/TODO.txt index 90f3301..6cef78c 100644 --- a/TODO.txt +++ b/TODO.txt @@ -3,6 +3,8 @@ clean all the cumbersome methods that have to be implemented for new quantifiers make truly parallel the GridSearchQ make more examples in the "examples" directory merge with master, because I had to fix some problems with QuaNet due to an issue notified via GitHub! +added cross_val_predict in qp.model_selection (i.e., a cross_val_predict for quantification) --would be nice to have + it parallelized Packaging: diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 202b5dd..4cec2cd 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -3,7 +3,7 @@ from copy import deepcopy from typing import Callable, Union import numpy as np from joblib import Parallel, delayed -from sklearn.base import BaseEstimator +from sklearn.base import BaseEstimator, clone from sklearn.calibration import CalibratedClassifierCV from sklearn.metrics import confusion_matrix from sklearn.model_selection import StratifiedKFold, cross_val_predict @@ -503,7 +503,7 @@ class EMQ(AggregativeProbabilisticQuantifier): :param learner: a sklearn's Estimator that generates a classifier :param exact_train_prev: set to True (default) for using, as the initial observation, the true training prevalence; or set to False for computing the training prevalence as an estimate, akin to PCC, i.e., as the expected - value of the posterior probabilities of the trianing documents as suggested in + value of the posterior probabilities of the training instances as suggested in `Alexandari et al. paper `_: """ @@ -519,7 +519,12 @@ class EMQ(AggregativeProbabilisticQuantifier): if self.exact_train_prev: self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_) else: - self.train_prevalence = PCC(learner=self.learner).fit(data, fit_learner=False).quantify(data.X) + self.train_prevalence = qp.model_selection.cross_val_predict( + quantifier=PCC(clone(self.learner)), + data=data, + nfolds=3, + random_state=0 + ) return self def aggregate(self, classif_posteriors, epsilon=EPSILON): diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 41a7a19..f7c5b94 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -2,6 +2,10 @@ import itertools import signal from copy import deepcopy from typing import Union, Callable + +import numpy as np +from sklearn import clone + import quapy as qp from quapy import evaluation from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol @@ -187,3 +191,28 @@ class GridSearchQ(BaseQuantifier): raise ValueError('best_model called before fit') + + +def cross_val_predict(quantifier: BaseQuantifier, data: LabelledCollection, nfolds=3, random_state=0): + """ + Akin to `scikit-learn's cross_val_predict `_ + but for quantification. + + :param quantifier: a quantifier issuing class prevalence values + :param data: a labelled collection + :param nfolds: number of folds for k-fold cross validation generation + :param random_state: random seed for reproducibility + :return: a vector of class prevalence values + """ + + total_prev = np.zeros(shape=data.n_classes) + + for train, test in data.kFCV(nfolds=nfolds, random_state=random_state): + quantifier.fit(train) + fold_prev = quantifier.quantify(test.X) + rel_size = len(test.X)/len(data) + total_prev += fold_prev*rel_size + + return total_prev + + From bb7a77c7c094f847020d54d6b12fb47f5ae44de7 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 13 Dec 2022 16:57:11 +0100 Subject: [PATCH 33/59] missing param in documentation of some protocols --- quapy/protocol.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/quapy/protocol.py b/quapy/protocol.py index 7652eeb..b30165f 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -134,6 +134,8 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param repeats: number of copies for each valid prevalence vector (default is 10) :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 :param random_state: allows replicating samples across runs (default None) + :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or + to "labelled_collection" to get instead instances of LabelledCollection """ def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=None, return_type='sample_prev'): @@ -192,6 +194,8 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param sample_size: integer, the number of instances in each sample :param repeats: the number of samples to generate. Default is 100. :param random_state: allows replicating samples across runs (default None) + :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or + to "labelled_collection" to get instead instances of LabelledCollection """ def __init__(self, data:LabelledCollection, sample_size, repeats=100, random_state=None, return_type='sample_prev'): @@ -229,6 +233,8 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) :param sample_size: integer, the number of instances in each sample :param repeats: the number of samples to generate. Default is 100. :param random_state: allows replicating samples across runs (default None) + :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or + to "labelled_collection" to get instead instances of LabelledCollection """ def __init__(self, data: LabelledCollection, sample_size, repeats=100, random_state=None, return_type='sample_prev'): From 8b0b9f522a4babf4d52ec82bd7a7058deaa43e45 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 16 Jan 2023 13:51:29 +0100 Subject: [PATCH 34/59] some bugfixes, unittest and minor changes --- quapy/CHANGE_LOG.txt | 12 ++++++++++++ quapy/data/_lequa2022.py | 6 +++--- quapy/data/base.py | 37 +++++++++++++++++++++++++++++++++++-- quapy/data/reader.py | 4 ++-- quapy/functional.py | 2 +- quapy/method/aggregative.py | 8 ++++++-- quapy/plot.py | 2 +- quapy/util.py | 3 ++- 8 files changed, 62 insertions(+), 12 deletions(-) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 6bef8b0..06d7dc4 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -22,6 +22,18 @@ - examples directory created! +- cross_val_predict (for quantification) added to model_selection: would be nice to allow the user specifies a + test protocol maybe, or None for bypassing it? + +- I think Pablo added DyS, Topsoe distance and binary search. + +- I think Pablo added multi-thread reproducibility. + +- Bugfix: adding two labelled collections (with +) now checks for consistency in the classes + +- newer versions of numpy raise a warning when accessing types (e.g., np.float). I have replaced all such instances + with the plain python type (e.g., float). + Things to fix: - clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance(): this is not working; I don't know how to make the isinstance work. Looks like there is some problem with the diff --git a/quapy/data/_lequa2022.py b/quapy/data/_lequa2022.py index 79ccccc..449eab6 100644 --- a/quapy/data/_lequa2022.py +++ b/quapy/data/_lequa2022.py @@ -26,15 +26,15 @@ def load_raw_documents(path): documents = list(df["text"].values) labels = None if "label" in df.columns: - labels = df["label"].values.astype(np.int) + labels = df["label"].values.astype(int) return documents, labels def load_vector_documents(path): - D = pd.read_csv(path).to_numpy(dtype=np.float) + D = pd.read_csv(path).to_numpy(dtype=float) labelled = D.shape[1] == 301 if labelled: - X, y = D[:, 1:], D[:, 0].astype(np.int).flatten() + X, y = D[:, 1:], D[:, 0].astype(int).flatten() else: X, y = D, None return X, y diff --git a/quapy/data/base.py b/quapy/data/base.py index 3c9bb67..62f871d 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -1,3 +1,5 @@ +from functools import cached_property + import numpy as np from scipy.sparse import issparse from scipy.sparse import vstack @@ -223,13 +225,44 @@ class LabelledCollection: test = LabelledCollection(te_docs, te_labels, classes_=self.classes_) return training, test + + def split_random(self, train_prop=0.6, random_state=None): + """ + Returns two instances of :class:`LabelledCollection` split randomly from this collection, at desired + proportion. + + :param train_prop: the proportion of elements to include in the left-most returned collection (typically used + as the training collection). The rest of elements are included in the right-most returned collection + (typically used as a test collection). + :param random_state: if specified, guarantees reproducibility of the split. + :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the + second one with `1-train_prop` elements + """ + indexes = np.random.RandomState(seed=random_state).permutation(len(self)) + if isinstance(train_prop, int): + assert train_prop < len(self), \ + 'argument train_prop cannot be greater than the number of elements in the collection' + splitpoint = train_prop + elif isinstance(train_prop, float): + assert 0 < train_prop < 1, \ + 'argument train_prop out of range (0,1)' + splitpoint = int(np.round(len(self)*train_prop)) + left, right = indexes[:splitpoint], indexes[splitpoint:] + training = self.sampling_from_index(left) + test = self.sampling_from_index(right) + return training, test + def __add__(self, other): """ - Returns a new :class:`LabelledCollection` as the union of this collection with another collection + Returns a new :class:`LabelledCollection` as the union of this collection with another collection. + Both labelled collections must have the same classes. :param other: another :class:`LabelledCollection` :return: a :class:`LabelledCollection` representing the union of both collections """ + if not all(np.sort(self.classes_)==np.sort(other.classes_)): + raise NotImplementedError('unsupported operation for collections on different classes') + if other is None: return self elif issparse(self.instances) and issparse(other.instances): @@ -241,7 +274,7 @@ class LabelledCollection: else: raise NotImplementedError('unsupported operation for collection types') labels = np.concatenate([self.labels, other.labels]) - return LabelledCollection(join_instances, labels) + return LabelledCollection(join_instances, labels, classes_=self.classes_) @property def Xy(self): diff --git a/quapy/data/reader.py b/quapy/data/reader.py index 8f8bc79..88791e3 100644 --- a/quapy/data/reader.py +++ b/quapy/data/reader.py @@ -102,7 +102,7 @@ def reindex_labels(y): y = np.asarray(y) classnames = np.asarray(sorted(np.unique(y))) label2index = {label: index for index, label in enumerate(classnames)} - indexed = np.empty(y.shape, dtype=np.int) + indexed = np.empty(y.shape, dtype=int) for label in classnames: indexed[y==label] = label2index[label] return indexed, classnames @@ -121,7 +121,7 @@ def binarize(y, pos_class): 0 otherwise """ y = np.asarray(y) - ybin = np.zeros(y.shape, dtype=np.int) + ybin = np.zeros(y.shape, dtype=int) ybin[y == pos_class] = 1 return ybin diff --git a/quapy/functional.py b/quapy/functional.py index 8cf0312..3ee46ff 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -39,7 +39,7 @@ def prevalence_from_labels(labels, classes): raise ValueError(f'param labels does not seem to be a ndarray of label predictions') unique, counts = np.unique(labels, return_counts=True) by_class = defaultdict(lambda:0, dict(zip(unique, counts))) - prevalences = np.asarray([by_class[class_] for class_ in classes], dtype=np.float) + prevalences = np.asarray([by_class[class_] for class_ in classes], dtype=float) prevalences /= prevalences.sum() return prevalences diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 4cec2cd..57c821d 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -132,7 +132,11 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier): def set_params(self, **parameters): if isinstance(self.learner, CalibratedClassifierCV): - parameters = {'base_estimator__' + k: v for k, v in parameters.items()} + if self.learner.get_params().get('base_estimator') == 'deprecated': + key_prefix = 'estimator__' # this has changed in the newer versions of sklearn + else: + key_prefix = 'base_estimator__' + parameters = {key_prefix + k: v for k, v in parameters.items()} self.learner.set_params(**parameters) @@ -369,7 +373,7 @@ class ACC(AggregativeQuantifier): # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a # document that belongs to yj ends up being classified as belonging to yi conf = confusion_matrix(y, y_, labels=classes).T - conf = conf.astype(np.float) + conf = conf.astype(float) class_counts = conf.sum(axis=0) for i, _ in enumerate(classes): if class_counts[i] == 0: diff --git a/quapy/plot.py b/quapy/plot.py index cdb9b1e..67ccd52 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -370,7 +370,7 @@ def brokenbar_supremacy_by_drift(method_names, true_prevs, estim_prevs, tr_prevs bins[-1] += 0.001 # we use this to keep track of how many datapoits contribute to each bin - inds_histogram_global = np.zeros(n_bins, dtype=np.float) + inds_histogram_global = np.zeros(n_bins, dtype=float) n_methods = len(method_order) buckets = np.zeros(shape=(n_methods, n_bins, 3)) for i, method in enumerate(method_order): diff --git a/quapy/util.py b/quapy/util.py index 94187e6..50a640d 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -23,7 +23,8 @@ def _get_parallel_slices(n_tasks, n_jobs): def map_parallel(func, args, n_jobs): """ Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then - func is applied in two parallel processes to args[0:50] and to args[50:99] + func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function + that already works with a list of arguments. :param func: function to be parallelized :param args: array-like of arguments to be passed to the function in different parallel calls From 948f63fade88bee27b545e12bd8ce667a192d9ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Mon, 16 Jan 2023 17:00:24 +0100 Subject: [PATCH 35/59] updating plot to center it better --- quapy/plot.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/quapy/plot.py b/quapy/plot.py index cdb9b1e..7d032f1 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -212,6 +212,7 @@ def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=N def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=20, error_name='ae', show_std=False, show_density=True, + show_legend=True, logscale=False, title=f'Quantification error as a function of distribution shift', vlines=None, @@ -234,6 +235,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, :param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae") :param show_std: whether or not to show standard deviations as color bands (default is False) :param show_density: whether or not to display the distribution of experiments for each bin (default is True) + :param show_density: whether or not to display the legend of the chart (default is True) :param logscale: whether or not to log-scale the y-error measure (default is False) :param title: title of the plot (default is "Quantification error as a function of distribution shift") :param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space @@ -306,7 +308,11 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, if vlines: for vline in vlines: ax.axvline(vline, 0, 1, linestyle='--', color='k') - ax.set_xlim(0, max_x) + + if not show_legend: + ax.get_legend().remove() + + ax.set_xlim(min_x, max_x) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) _save_or_show(savepath) From 7bcf8b24e9e08b05c3390f3667cb3482dfe93c1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Mon, 16 Jan 2023 17:17:02 +0100 Subject: [PATCH 36/59] fixing bug --- quapy/plot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/quapy/plot.py b/quapy/plot.py index 7d032f1..794fd4c 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -309,11 +309,11 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, for vline in vlines: ax.axvline(vline, 0, 1, linestyle='--', color='k') - if not show_legend: - ax.get_legend().remove() ax.set_xlim(min_x, max_x) - ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) + + if not show_legend: + ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) _save_or_show(savepath) From c888346fcffd1f4f40843615148e6519a6e6419c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Tue, 17 Jan 2023 11:03:52 +0100 Subject: [PATCH 37/59] solving a bug in show_legend --- quapy/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quapy/plot.py b/quapy/plot.py index 794fd4c..c1a857e 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -312,7 +312,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, ax.set_xlim(min_x, max_x) - if not show_legend: + if show_legend: ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) _save_or_show(savepath) From 6e910075ab490ef7640ed949a51bef7ec65fd2ef Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 17 Jan 2023 13:53:48 +0100 Subject: [PATCH 38/59] adding calibration methods from abstension package --- quapy/CHANGE_LOG.txt | 3 +++ quapy/method/aggregative.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 06d7dc4..20e0759 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -34,7 +34,10 @@ - newer versions of numpy raise a warning when accessing types (e.g., np.float). I have replaced all such instances with the plain python type (e.g., float). +- new dependency "abstention" (to add to the project requirements and setup) + Things to fix: +- calibration with recalibration methods has to be fixed for exact_train_prev in EMQ (conflicts with clone, deepcopy, etc.) - clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance(): this is not working; I don't know how to make the isinstance work. Looks like there is some problem with the path of the imported class wrt the path of the class that arrives from another module... diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 57c821d..9e5338d 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -10,6 +10,7 @@ from sklearn.model_selection import StratifiedKFold, cross_val_predict from tqdm import tqdm import quapy as qp import quapy.functional as F +from classification.calibration import RecalibratedClassifier from quapy.classification.svmperf import SVMperf from quapy.data import LabelledCollection from quapy.method.base import BaseQuantifier, BinaryQuantifier @@ -137,6 +138,7 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier): else: key_prefix = 'base_estimator__' parameters = {key_prefix + k: v for k, v in parameters.items()} + self.learner.set_params(**parameters) From 50d886bffe7f3def0ce49388b299b915a4f40b5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Wed, 18 Jan 2023 13:06:38 +0100 Subject: [PATCH 39/59] testing log scale --- quapy/plot.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/quapy/plot.py b/quapy/plot.py index c1a857e..7b2145f 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -4,6 +4,7 @@ from matplotlib.cm import get_cmap import numpy as np from matplotlib import cm from scipy.stats import ttest_ind_from_stats +from matplotlib.ticker import StrMethodFormatter, NullFormatter import quapy as qp @@ -256,6 +257,9 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, # x_error function) and 'y' is the estim-test shift (computed as according to y_error) data = _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error, y_error, method_order) + if method_order is None: + method_order = method_names + _set_colors(ax, n_methods=len(method_order)) bins = np.linspace(0, 1, n_bins+1) @@ -266,7 +270,11 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, tr_test_drifts = data[method]['x'] method_drifts = data[method]['y'] if logscale: - method_drifts=np.log(1+method_drifts) + #method_drifts=np.log(1+method_drifts) + plt.yscale("log") + ax.yaxis.set_major_formatter(StrMethodFormatter('{x:.2f}')) + ax.yaxis.set_minor_formatter(StrMethodFormatter('{x:.2f}')) + inds = np.digitize(tr_test_drifts, bins, right=True) @@ -299,7 +307,7 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, if show_density: ax.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], max_y*npoints/np.max(npoints), alpha=0.15, color='g', width=binwidth, label='density') - + ax.set(xlabel=f'Distribution shift between training set and test sample', ylabel=f'{error_name.upper()} (true distribution, predicted distribution)', title=title) From f10a3139d9594b38a95234ac8513c5473baaded4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Wed, 18 Jan 2023 14:53:46 +0100 Subject: [PATCH 40/59] changes to plots again --- quapy/plot.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/quapy/plot.py b/quapy/plot.py index 7b2145f..2e41413 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -4,7 +4,7 @@ from matplotlib.cm import get_cmap import numpy as np from matplotlib import cm from scipy.stats import ttest_ind_from_stats -from matplotlib.ticker import StrMethodFormatter, NullFormatter +from matplotlib.ticker import ScalarFormatter import quapy as qp @@ -270,11 +270,11 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, tr_test_drifts = data[method]['x'] method_drifts = data[method]['y'] if logscale: - #method_drifts=np.log(1+method_drifts) - plt.yscale("log") - ax.yaxis.set_major_formatter(StrMethodFormatter('{x:.2f}')) - ax.yaxis.set_minor_formatter(StrMethodFormatter('{x:.2f}')) - + ax.set_yscale("log") + ax.yaxis.set_major_formatter(ScalarFormatter()) + ax.yaxis.set_minor_formatter(ScalarFormatter()) + ax.yaxis.get_major_formatter().set_scientific(False) + ax.yaxis.get_minor_formatter().set_scientific(False) inds = np.digitize(tr_test_drifts, bins, right=True) @@ -305,8 +305,14 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25) if show_density: - ax.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], + ax2 = ax.twinx() + ax2.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], max_y*npoints/np.max(npoints), alpha=0.15, color='g', width=binwidth, label='density') + #ax2.set_ylabel("bar data") + ax2.set_ylim(0,1) + ax2.spines['right'].set_color('g') + ax2.tick_params(axis='y', colors='g') + #ax2.yaxis.set_visible(False) ax.set(xlabel=f'Distribution shift between training set and test sample', ylabel=f'{error_name.upper()} (true distribution, predicted distribution)', @@ -321,8 +327,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, ax.set_xlim(min_x, max_x) if show_legend: - ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) - + fig.legend(loc='right') + _save_or_show(savepath) From 7ed7c9b2e94a9da4d0d763c389bde9b612354b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Wed, 18 Jan 2023 16:05:40 +0100 Subject: [PATCH 41/59] changing the logaritmic scale --- quapy/plot.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/quapy/plot.py b/quapy/plot.py index 2e41413..15c7be5 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -5,6 +5,7 @@ import numpy as np from matplotlib import cm from scipy.stats import ttest_ind_from_stats from matplotlib.ticker import ScalarFormatter +import math import quapy as qp @@ -272,9 +273,8 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, if logscale: ax.set_yscale("log") ax.yaxis.set_major_formatter(ScalarFormatter()) - ax.yaxis.set_minor_formatter(ScalarFormatter()) ax.yaxis.get_major_formatter().set_scientific(False) - ax.yaxis.get_minor_formatter().set_scientific(False) + ax.minorticks_off() inds = np.digitize(tr_test_drifts, bins, right=True) @@ -307,12 +307,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, if show_density: ax2 = ax.twinx() ax2.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], - max_y*npoints/np.max(npoints), alpha=0.15, color='g', width=binwidth, label='density') - #ax2.set_ylabel("bar data") + npoints/np.sum(npoints), alpha=0.15, color='g', width=binwidth, label='density') ax2.set_ylim(0,1) ax2.spines['right'].set_color('g') ax2.tick_params(axis='y', colors='g') - #ax2.yaxis.set_visible(False) ax.set(xlabel=f'Distribution shift between training set and test sample', ylabel=f'{error_name.upper()} (true distribution, predicted distribution)', @@ -325,9 +323,13 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, ax.set_xlim(min_x, max_x) + if logscale: + #nice scale for the logaritmic axis + ax.set_ylim(0,10 ** math.ceil(math.log10(max_y))) + if show_legend: - fig.legend(loc='right') + fig.legend(bbox_to_anchor=(1.05, 1), loc="upper right") _save_or_show(savepath) @@ -549,7 +551,7 @@ def _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error method_order = [] for method, test_prevs_i, estim_prevs_i, tr_prev_i in zip(method_names, true_prevs, estim_prevs, tr_prevs): - tr_prev_i = np.repeat(tr_prev_i.reshape(1, -1), repeats=test_prevs_i.shape[0], axis=0) + tr_prev_i = np.repeat(tr_prevs.reshape(1, -1), repeats=test_prevs_i.shape[0], axis=0) tr_test_drifts = x_error(test_prevs_i, tr_prev_i) data[method]['x'] = np.concatenate([data[method]['x'], tr_test_drifts]) From 8da4b4c5f399c764bf96bda36004ebb57f05c548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Wed, 18 Jan 2023 16:12:38 +0100 Subject: [PATCH 42/59] placing the legend --- quapy/plot.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/quapy/plot.py b/quapy/plot.py index 15c7be5..358bf45 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -306,9 +306,10 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, if show_density: ax2 = ax.twinx() + densities = npoints/np.sum(npoints) ax2.bar([ind * binwidth-binwidth/2 for ind in range(len(bins))], - npoints/np.sum(npoints), alpha=0.15, color='g', width=binwidth, label='density') - ax2.set_ylim(0,1) + densities, alpha=0.15, color='g', width=binwidth, label='density') + ax2.set_ylim(0,max(densities)) ax2.spines['right'].set_color('g') ax2.tick_params(axis='y', colors='g') @@ -329,7 +330,9 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, if show_legend: - fig.legend(bbox_to_anchor=(1.05, 1), loc="upper right") + fig.legend(loc='lower center', + bbox_to_anchor=(1, 0.5), + ncol=(len(method_names)+1)//2) _save_or_show(savepath) From 38aa42e4c52ef6434ce02918c8263b76372ffedb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Gonz=C3=A1lez?= Date: Wed, 18 Jan 2023 16:44:56 +0100 Subject: [PATCH 43/59] fixing a bug --- quapy/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quapy/plot.py b/quapy/plot.py index 358bf45..061ecdc 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -554,7 +554,7 @@ def _join_data_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, x_error method_order = [] for method, test_prevs_i, estim_prevs_i, tr_prev_i in zip(method_names, true_prevs, estim_prevs, tr_prevs): - tr_prev_i = np.repeat(tr_prevs.reshape(1, -1), repeats=test_prevs_i.shape[0], axis=0) + tr_prev_i = np.repeat(tr_prev_i.reshape(1, -1), repeats=test_prevs_i.shape[0], axis=0) tr_test_drifts = x_error(test_prevs_i, tr_prev_i) data[method]['x'] = np.concatenate([data[method]['x'], tr_test_drifts]) From 09abcfc935c7a0efe180e8c5bc3e468e29228d56 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Wed, 18 Jan 2023 19:46:19 +0100 Subject: [PATCH 44/59] adding calibration methods from the abstension package to quapy --- quapy/CHANGE_LOG.txt | 3 +- quapy/classification/calibration.py | 166 ++++++++++++++++++++++++++++ quapy/method/aggregative.py | 26 ++++- quapy/plot.py | 1 - 4 files changed, 191 insertions(+), 5 deletions(-) create mode 100644 quapy/classification/calibration.py diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 20e0759..090afc8 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -34,7 +34,8 @@ - newer versions of numpy raise a warning when accessing types (e.g., np.float). I have replaced all such instances with the plain python type (e.g., float). -- new dependency "abstention" (to add to the project requirements and setup) +- new dependency "abstention" (to add to the project requirements and setup). Calibration methods from + https://github.com/kundajelab/abstention added. Things to fix: - calibration with recalibration methods has to be fixed for exact_train_prev in EMQ (conflicts with clone, deepcopy, etc.) diff --git a/quapy/classification/calibration.py b/quapy/classification/calibration.py new file mode 100644 index 0000000..9ea5576 --- /dev/null +++ b/quapy/classification/calibration.py @@ -0,0 +1,166 @@ +from copy import deepcopy + +from abstention.calibration import NoBiasVectorScaling, TempScaling, VectorScaling +from sklearn.base import BaseEstimator, clone +from sklearn.model_selection import cross_val_predict, train_test_split +import numpy as np + + +# Wrappers of calibration defined by Alexandari et al. in paper +# requires "pip install abstension" +# see https://github.com/kundajelab/abstention + + +class RecalibratedClassifier: + pass + + +class RecalibratedClassifierBase(BaseEstimator, RecalibratedClassifier): + """ + Applies a (re)calibration method from abstention.calibration, as defined in + `Alexandari et al. paper `_: + + :param estimator: a scikit-learn probabilistic classifier + :param calibrator: the calibration object (an instance of abstention.calibration.CalibratorFactory) + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, estimator, calibrator, val_split=5, n_jobs=1, verbose=False): + self.estimator = estimator + self.calibrator = calibrator + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose + + def fit(self, X, y): + k = self.val_split + if isinstance(k, int): + if k < 2: + raise ValueError('wrong value for val_split: the number of folds must be > 2') + return self.fit_cv(X, y) + elif isinstance(k, float): + if not (0 < k < 1): + raise ValueError('wrong value for val_split: the proportion of validation documents must be in (0,1)') + return self.fit_cv(X, y) + + def fit_cv(self, X, y): + posteriors = cross_val_predict( + self.estimator, X, y, cv=self.val_split, n_jobs=self.n_jobs, verbose=self.verbose, method="predict_proba" + ) + self.estimator.fit(X, y) + nclasses = len(np.unique(y)) + self.calibration_function = self.calibrator(posteriors, np.eye(nclasses)[y], posterior_supplied=True) + return self + + def fit_tr_val(self, X, y): + Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=self.val_split, stratify=y) + self.estimator.fit(Xtr, ytr) + posteriors = self.estimator.predict_proba(Xva) + nclasses = len(np.unique(yva)) + self.calibrator = self.calibrator(posteriors, np.eye(nclasses)[yva], posterior_supplied=True) + return self + + def predict(self, X): + return self.estimator.predict(X) + + def predict_proba(self, X): + posteriors = self.estimator.predict_proba(X) + return self.calibration_function(posteriors) + + @property + def classes_(self): + return self.estimator.classes_ + + +class NBVSCalibration(RecalibratedClassifierBase): + """ + Applies the No-Bias Vector Scaling (NBVS) calibration method from abstention.calibration, as defined in + `Alexandari et al. paper `_: + + :param estimator: a scikit-learn probabilistic classifier + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, estimator, val_split=5, n_jobs=1, verbose=False): + self.estimator = estimator + self.calibrator = NoBiasVectorScaling(verbose=verbose) + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose + + +class BCTSCalibration(RecalibratedClassifierBase): + """ + Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from abstention.calibration, as defined in + `Alexandari et al. paper `_: + + :param estimator: a scikit-learn probabilistic classifier + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, estimator, val_split=5, n_jobs=1, verbose=False): + self.estimator = estimator + self.calibrator = TempScaling(verbose=verbose, bias_positions='all') + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose + + +class TSCalibration(RecalibratedClassifierBase): + """ + Applies the Temperature Scaling (TS) calibration method from abstention.calibration, as defined in + `Alexandari et al. paper `_: + + :param estimator: a scikit-learn probabilistic classifier + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, estimator, val_split=5, n_jobs=1, verbose=False): + self.estimator = estimator + self.calibrator = TempScaling(verbose=verbose) + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose + + +class VSCalibration(RecalibratedClassifierBase): + """ + Applies the Vector Scaling (VS) calibration method from abstention.calibration, as defined in + `Alexandari et al. paper `_: + + :param estimator: a scikit-learn probabilistic classifier + :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the + training instances (the rest is used for training). In any case, the classifier is retrained in the whole + training set afterwards. + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param verbose: whether or not to display information in the standard output + """ + + def __init__(self, estimator, val_split=5, n_jobs=1, verbose=False): + self.estimator = estimator + self.calibrator = VectorScaling(verbose=verbose) + self.val_split = val_split + self.n_jobs = n_jobs + self.verbose = verbose + diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 9e5338d..d77f1ed 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -10,7 +10,8 @@ from sklearn.model_selection import StratifiedKFold, cross_val_predict from tqdm import tqdm import quapy as qp import quapy.functional as F -from classification.calibration import RecalibratedClassifier +from classification.calibration import RecalibratedClassifier, NBVSCalibration, BCTSCalibration, TSCalibration, \ + VSCalibration from quapy.classification.svmperf import SVMperf from quapy.data import LabelledCollection from quapy.method.base import BaseQuantifier, BinaryQuantifier @@ -138,8 +139,11 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier): else: key_prefix = 'base_estimator__' parameters = {key_prefix + k: v for k, v in parameters.items()} + elif isinstance(self.learner, RecalibratedClassifier): + parameters = {'estimator__' + k: v for k, v in parameters.items()} self.learner.set_params(**parameters) + return self # Helper @@ -511,22 +515,38 @@ class EMQ(AggregativeProbabilisticQuantifier): or set to False for computing the training prevalence as an estimate, akin to PCC, i.e., as the expected value of the posterior probabilities of the training instances as suggested in `Alexandari et al. paper `_: + :param recalib: a string indicating the method of recalibration. Available choices include "nbvs" (No-Bias Vector + Scaling), "bcts" (Bias-Corrected Temperature Scaling), "ts" (Temperature Scaling), and "vs" (Vector Scaling). + The default value is None, indicating no recalibration. """ MAX_ITER = 1000 EPSILON = 1e-4 - def __init__(self, learner: BaseEstimator, exact_train_prev=True): + def __init__(self, learner: BaseEstimator, exact_train_prev=True, recalib=None): self.learner = learner self.exact_train_prev = exact_train_prev + self.recalib = recalib def fit(self, data: LabelledCollection, fit_learner=True): + if self.recalib is not None: + if self.recalib == 'nbvs': + self.learner = NBVSCalibration(self.learner) + elif self.recalib == 'bcts': + self.learner = BCTSCalibration(self.learner) + elif self.recalib == 'ts': + self.learner = TSCalibration(self.learner) + elif self.recalib == 'vs': + self.learner = VSCalibration(self.learner) + else: + raise ValueError('invalid param argument for recalibration method; available ones are ' + '"nbvs", "bcts", "ts", and "vs".') self.learner, _ = _training_helper(self.learner, data, fit_learner, ensure_probabilistic=True) if self.exact_train_prev: self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_) else: self.train_prevalence = qp.model_selection.cross_val_predict( - quantifier=PCC(clone(self.learner)), + quantifier=PCC(deepcopy(self.learner)), data=data, nfolds=3, random_state=0 diff --git a/quapy/plot.py b/quapy/plot.py index 7d94012..b63eba6 100644 --- a/quapy/plot.py +++ b/quapy/plot.py @@ -323,7 +323,6 @@ def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, for vline in vlines: ax.axvline(vline, 0, 1, linestyle='--', color='k') - ax.set_xlim(min_x, max_x) if show_legend: From adf799c8eca88952c4bdc4ca6a5cfbb4e05c8010 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 24 Jan 2023 09:48:21 +0100 Subject: [PATCH 45/59] recalibration --- examples/lequa2022_experiments_recalib.py | 59 +++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 examples/lequa2022_experiments_recalib.py diff --git a/examples/lequa2022_experiments_recalib.py b/examples/lequa2022_experiments_recalib.py new file mode 100644 index 0000000..983c781 --- /dev/null +++ b/examples/lequa2022_experiments_recalib.py @@ -0,0 +1,59 @@ +import numpy as np +from abstention.calibration import NoBiasVectorScaling, VectorScaling, TempScaling +from sklearn.calibration import CalibratedClassifierCV +from sklearn.linear_model import LogisticRegression +import quapy as qp +import quapy.functional as F +from classification.calibration import RecalibratedClassifierBase, NBVSCalibration, \ + BCTSCalibration +from data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022 +from evaluation import evaluation_report +from method.aggregative import EMQ +from model_selection import GridSearchQ +import pandas as pd + +for task in ['T1A', 'T1B']: + for calib in ['NoCal', 'TS', 'VS', 'NBVS', 'NBTS']: + + # calibration = TempScaling(verbose=False, bias_positions='all') + + qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task] + training, val_generator, test_generator = fetch_lequa2022(task=task) + + # define the quantifier + # learner = BCTSCalibration(LogisticRegression(), n_jobs=-1) + # learner = CalibratedClassifierCV(LogisticRegression()) + learner = LogisticRegression() + quantifier = EMQ(learner=learner, exact_train_prev=False, recalib=calib.lower() if calib != 'NoCal' else None) + + # model selection + param_grid = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]} + model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', n_jobs=-1, refit=False, verbose=True) + quantifier = model_selection.fit(training) + + # evaluation + report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True) + + import os + os.makedirs(f'./predictions/{task}', exist_ok=True) + with open(f'./predictions/{task}/{calib}-EMQ.csv', 'wt') as foo: + estim_prev = report['estim-prev'].values + nclasses = len(estim_prev[0]) + foo.write(f'id,'+','.join([str(x) for x in range(nclasses)])+'\n') + for id, prev in enumerate(estim_prev): + foo.write(f'{id},'+','.join([f'{p:.5f}' for p in prev])+'\n') + + os.makedirs(f'./errors/{task}', exist_ok=True) + with open(f'./errors/{task}/{calib}-EMQ.csv', 'wt') as foo: + maes, mraes = report['mae'].values, report['mrae'].values + foo.write(f'id,AE,RAE\n') + for id, (ae_i, rae_i) in enumerate(zip(maes, mraes)): + foo.write(f'{id},{ae_i:.5f},{rae_i:.5f}\n') + + # printing results + pd.set_option('display.expand_frame_repr', False) + report['estim-prev'] = report['estim-prev'].map(F.strprev) + print(report) + + print('Averaged values:') + print(report.mean()) From f9a199d85976ecc7be24ce00341c72b82ff4cb65 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 27 Jan 2023 18:13:23 +0100 Subject: [PATCH 46/59] fixing hyperparameters with prefixes, and replacing learner with classifier in aggregative quantifiers --- TODO.txt | 6 + examples/lequa2022_experiments.py | 2 +- examples/lequa2022_experiments_recalib.py | 42 +-- quapy/CHANGE_LOG.txt | 6 + quapy/classification/calibration.py | 60 ++-- quapy/method/aggregative.py | 365 +++++++++++----------- quapy/method/base.py | 51 +-- quapy/method/meta.py | 62 ++-- quapy/method/neural.py | 60 ++-- quapy/model_selection.py | 16 +- 10 files changed, 352 insertions(+), 318 deletions(-) diff --git a/TODO.txt b/TODO.txt index 6cef78c..36b7e95 100644 --- a/TODO.txt +++ b/TODO.txt @@ -6,6 +6,12 @@ merge with master, because I had to fix some problems with QuaNet due to an issu added cross_val_predict in qp.model_selection (i.e., a cross_val_predict for quantification) --would be nice to have it parallelized +check the OneVsAll module(s) + +check the set_params de neural.py, because the separation of estimator__ is not implemented; see also + __check_params_colision + +HDy can be customized so that the number of bins is specified, instead of explored within the fit method Packaging: ========================================== diff --git a/examples/lequa2022_experiments.py b/examples/lequa2022_experiments.py index 31ec651..41bc495 100644 --- a/examples/lequa2022_experiments.py +++ b/examples/lequa2022_experiments.py @@ -17,7 +17,7 @@ training, val_generator, test_generator = fetch_lequa2022(task=task) # define the quantifier learner = CalibratedClassifierCV(LogisticRegression()) -quantifier = EMQ(learner=learner) +quantifier = EMQ(classifier=learner) # model selection param_grid = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]} diff --git a/examples/lequa2022_experiments_recalib.py b/examples/lequa2022_experiments_recalib.py index 983c781..a5a0e05 100644 --- a/examples/lequa2022_experiments_recalib.py +++ b/examples/lequa2022_experiments_recalib.py @@ -4,7 +4,7 @@ from sklearn.calibration import CalibratedClassifierCV from sklearn.linear_model import LogisticRegression import quapy as qp import quapy.functional as F -from classification.calibration import RecalibratedClassifierBase, NBVSCalibration, \ +from classification.calibration import RecalibratedProbabilisticClassifierBase, NBVSCalibration, \ BCTSCalibration from data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022 from evaluation import evaluation_report @@ -13,7 +13,6 @@ from model_selection import GridSearchQ import pandas as pd for task in ['T1A', 'T1B']: - for calib in ['NoCal', 'TS', 'VS', 'NBVS', 'NBTS']: # calibration = TempScaling(verbose=False, bias_positions='all') @@ -24,31 +23,36 @@ for task in ['T1A', 'T1B']: # learner = BCTSCalibration(LogisticRegression(), n_jobs=-1) # learner = CalibratedClassifierCV(LogisticRegression()) learner = LogisticRegression() - quantifier = EMQ(learner=learner, exact_train_prev=False, recalib=calib.lower() if calib != 'NoCal' else None) + quantifier = EMQ(classifier=learner) # model selection - param_grid = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]} + param_grid = { + 'classifier__C': np.logspace(-3, 3, 7), + 'classifier__class_weight': ['balanced', None], + 'recalib': ['platt', 'ts', 'vs', 'nbvs', 'bcts', None], + 'exact_train_prev': [False, True] + } model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', n_jobs=-1, refit=False, verbose=True) quantifier = model_selection.fit(training) # evaluation report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True) - import os - os.makedirs(f'./predictions/{task}', exist_ok=True) - with open(f'./predictions/{task}/{calib}-EMQ.csv', 'wt') as foo: - estim_prev = report['estim-prev'].values - nclasses = len(estim_prev[0]) - foo.write(f'id,'+','.join([str(x) for x in range(nclasses)])+'\n') - for id, prev in enumerate(estim_prev): - foo.write(f'{id},'+','.join([f'{p:.5f}' for p in prev])+'\n') - - os.makedirs(f'./errors/{task}', exist_ok=True) - with open(f'./errors/{task}/{calib}-EMQ.csv', 'wt') as foo: - maes, mraes = report['mae'].values, report['mrae'].values - foo.write(f'id,AE,RAE\n') - for id, (ae_i, rae_i) in enumerate(zip(maes, mraes)): - foo.write(f'{id},{ae_i:.5f},{rae_i:.5f}\n') + # import os + # os.makedirs(f'./out', exist_ok=True) + # with open(f'./out/EMQ_{calib}_{task}.txt', 'wt') as foo: + # estim_prev = report['estim-prev'].values + # nclasses = len(estim_prev[0]) + # foo.write(f'id,'+','.join([str(x) for x in range(nclasses)])+'\n') + # for id, prev in enumerate(estim_prev): + # foo.write(f'{id},'+','.join([f'{p:.5f}' for p in prev])+'\n') + # + # #os.makedirs(f'./errors/{task}', exist_ok=True) + # with open(f'./out/EMQ_{calib}_{task}_errors.txt', 'wt') as foo: + # maes, mraes = report['mae'].values, report['mrae'].values + # foo.write(f'id,AE,RAE\n') + # for id, (ae_i, rae_i) in enumerate(zip(maes, mraes)): + # foo.write(f'{id},{ae_i:.5f},{rae_i:.5f}\n') # printing results pd.set_option('display.expand_frame_repr', False) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 090afc8..c450b41 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -37,6 +37,12 @@ - new dependency "abstention" (to add to the project requirements and setup). Calibration methods from https://github.com/kundajelab/abstention added. +- the internal classifier of aggregative methods is now called "classifier" instead of "learner" + +- when optimizing the hyperparameters of an aggregative quantifier, the classifier's specific hyperparameters + should be marked with a "classifier__" prefix (just like in scikit-learn), while the quantifier's specific + hyperparameters are named directly. For example, PCC(LogisticRegression()) quantifier has + Things to fix: - calibration with recalibration methods has to be fixed for exact_train_prev in EMQ (conflicts with clone, deepcopy, etc.) - clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance(): diff --git a/quapy/classification/calibration.py b/quapy/classification/calibration.py index 9ea5576..69a7e14 100644 --- a/quapy/classification/calibration.py +++ b/quapy/classification/calibration.py @@ -11,27 +11,27 @@ import numpy as np # see https://github.com/kundajelab/abstention -class RecalibratedClassifier: +class RecalibratedProbabilisticClassifier: pass -class RecalibratedClassifierBase(BaseEstimator, RecalibratedClassifier): +class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabilisticClassifier): """ Applies a (re)calibration method from abstention.calibration, as defined in `Alexandari et al. paper `_: - :param estimator: a scikit-learn probabilistic classifier + :param classifier: a scikit-learn probabilistic classifier :param calibrator: the calibration object (an instance of abstention.calibration.CalibratorFactory) - :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p + :param val_split: indicate an integer k for performing kFCV to obtain the posterior probabilities, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole training set afterwards. - :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) + :param n_jobs: indicate the number of parallel workers (only when val_split is an integer); default=None :param verbose: whether or not to display information in the standard output """ - def __init__(self, estimator, calibrator, val_split=5, n_jobs=1, verbose=False): - self.estimator = estimator + def __init__(self, classifier, calibrator, val_split=5, n_jobs=None, verbose=False): + self.classifier = classifier self.calibrator = calibrator self.val_split = val_split self.n_jobs = n_jobs @@ -50,39 +50,39 @@ class RecalibratedClassifierBase(BaseEstimator, RecalibratedClassifier): def fit_cv(self, X, y): posteriors = cross_val_predict( - self.estimator, X, y, cv=self.val_split, n_jobs=self.n_jobs, verbose=self.verbose, method="predict_proba" + self.classifier, X, y, cv=self.val_split, n_jobs=self.n_jobs, verbose=self.verbose, method='predict_proba' ) - self.estimator.fit(X, y) + self.classifier.fit(X, y) nclasses = len(np.unique(y)) self.calibration_function = self.calibrator(posteriors, np.eye(nclasses)[y], posterior_supplied=True) return self def fit_tr_val(self, X, y): Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=self.val_split, stratify=y) - self.estimator.fit(Xtr, ytr) - posteriors = self.estimator.predict_proba(Xva) + self.classifier.fit(Xtr, ytr) + posteriors = self.classifier.predict_proba(Xva) nclasses = len(np.unique(yva)) self.calibrator = self.calibrator(posteriors, np.eye(nclasses)[yva], posterior_supplied=True) return self def predict(self, X): - return self.estimator.predict(X) + return self.classifier.predict(X) def predict_proba(self, X): - posteriors = self.estimator.predict_proba(X) + posteriors = self.classifier.predict_proba(X) return self.calibration_function(posteriors) @property def classes_(self): - return self.estimator.classes_ + return self.classifier.classes_ -class NBVSCalibration(RecalibratedClassifierBase): +class NBVSCalibration(RecalibratedProbabilisticClassifierBase): """ Applies the No-Bias Vector Scaling (NBVS) calibration method from abstention.calibration, as defined in `Alexandari et al. paper `_: - :param estimator: a scikit-learn probabilistic classifier + :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole @@ -91,20 +91,20 @@ class NBVSCalibration(RecalibratedClassifierBase): :param verbose: whether or not to display information in the standard output """ - def __init__(self, estimator, val_split=5, n_jobs=1, verbose=False): - self.estimator = estimator + def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + self.classifier = classifier self.calibrator = NoBiasVectorScaling(verbose=verbose) self.val_split = val_split self.n_jobs = n_jobs self.verbose = verbose -class BCTSCalibration(RecalibratedClassifierBase): +class BCTSCalibration(RecalibratedProbabilisticClassifierBase): """ Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from abstention.calibration, as defined in `Alexandari et al. paper `_: - :param estimator: a scikit-learn probabilistic classifier + :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole @@ -113,20 +113,20 @@ class BCTSCalibration(RecalibratedClassifierBase): :param verbose: whether or not to display information in the standard output """ - def __init__(self, estimator, val_split=5, n_jobs=1, verbose=False): - self.estimator = estimator + def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + self.classifier = classifier self.calibrator = TempScaling(verbose=verbose, bias_positions='all') self.val_split = val_split self.n_jobs = n_jobs self.verbose = verbose -class TSCalibration(RecalibratedClassifierBase): +class TSCalibration(RecalibratedProbabilisticClassifierBase): """ Applies the Temperature Scaling (TS) calibration method from abstention.calibration, as defined in `Alexandari et al. paper `_: - :param estimator: a scikit-learn probabilistic classifier + :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole @@ -135,20 +135,20 @@ class TSCalibration(RecalibratedClassifierBase): :param verbose: whether or not to display information in the standard output """ - def __init__(self, estimator, val_split=5, n_jobs=1, verbose=False): - self.estimator = estimator + def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + self.classifier = classifier self.calibrator = TempScaling(verbose=verbose) self.val_split = val_split self.n_jobs = n_jobs self.verbose = verbose -class VSCalibration(RecalibratedClassifierBase): +class VSCalibration(RecalibratedProbabilisticClassifierBase): """ Applies the Vector Scaling (VS) calibration method from abstention.calibration, as defined in `Alexandari et al. paper `_: - :param estimator: a scikit-learn probabilistic classifier + :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole @@ -157,8 +157,8 @@ class VSCalibration(RecalibratedClassifierBase): :param verbose: whether or not to display information in the standard output """ - def __init__(self, estimator, val_split=5, n_jobs=1, verbose=False): - self.estimator = estimator + def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + self.classifier = classifier self.calibrator = VectorScaling(verbose=verbose) self.val_split = val_split self.n_jobs = n_jobs diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index d77f1ed..3246b9f 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -10,7 +10,7 @@ from sklearn.model_selection import StratifiedKFold, cross_val_predict from tqdm import tqdm import quapy as qp import quapy.functional as F -from classification.calibration import RecalibratedClassifier, NBVSCalibration, BCTSCalibration, TSCalibration, \ +from classification.calibration import RecalibratedProbabilisticClassifier, NBVSCalibration, BCTSCalibration, TSCalibration, \ VSCalibration from quapy.classification.svmperf import SVMperf from quapy.data import LabelledCollection @@ -23,41 +23,41 @@ from quapy.method.base import BaseQuantifier, BinaryQuantifier class AggregativeQuantifier(BaseQuantifier): """ Abstract class for quantification methods that base their estimations on the aggregation of classification - results. Aggregative Quantifiers thus implement a :meth:`classify` method and maintain a :attr:`learner` attribute. - Subclasses of this abstract class must implement the method :meth:`aggregate` which computes the aggregation - of label predictions. The method :meth:`quantify` comes with a default implementation based on - :meth:`classify` and :meth:`aggregate`. + results. Aggregative Quantifiers thus implement a :meth:`classify` method and maintain a :attr:`classifier` + attribute. Subclasses of this abstract class must implement the method :meth:`aggregate` which computes the + aggregation of label predictions. The method :meth:`quantify` comes with a default implementation based on + :meth:`classify` and :meth:`aggregate`. """ @abstractmethod - def fit(self, data: LabelledCollection, fit_learner=True): + def fit(self, data: LabelledCollection, fit_classifier=True): """ Trains the aggregative quantifier :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data - :param fit_learner: whether or not to train the learner (default is True). Set to False if the + :param fit_classifier: whether or not to train the learner (default is True). Set to False if the learner has been trained outside the quantifier. :return: self """ ... @property - def learner(self): + def classifier(self): """ Gives access to the classifier :return: the classifier (typically an sklearn's Estimator) """ - return self.learner_ + return self.classifier_ - @learner.setter - def learner(self, classifier): + @classifier.setter + def classifier(self, classifier): """ Setter for the classifier :param classifier: the classifier """ - self.learner_ = classifier + self.classifier_ = classifier def classify(self, instances): """ @@ -68,7 +68,7 @@ class AggregativeQuantifier(BaseQuantifier): :param instances: array-like :return: np.ndarray of shape `(n_instances,)` with label predictions """ - return self.learner.predict(instances) + return self.classifier.predict(instances) def quantify(self, instances): """ @@ -91,24 +91,24 @@ class AggregativeQuantifier(BaseQuantifier): """ ... - def get_params(self, deep=True): - """ - Return the current parameters of the quantifier. + # def get_params(self, deep=True): + # """ + # Return the current parameters of the quantifier. + # + # :param deep: for compatibility with sklearn + # :return: a dictionary of param-value pairs + # """ + # + # return self.learner.get_params() - :param deep: for compatibility with sklearn - :return: a dictionary of param-value pairs - """ - - return self.learner.get_params() - - def set_params(self, **parameters): - """ - Set the parameters of the quantifier. - - :param parameters: dictionary of param-value pairs - """ - - self.learner.set_params(**parameters) + # def set_params(self, **parameters): + # """ + # Set the parameters of the quantifier. + # + # :param parameters: dictionary of param-value pairs + # """ + # + # self.learner.set_params(**parameters) @property def classes_(self): @@ -118,7 +118,7 @@ class AggregativeQuantifier(BaseQuantifier): :return: array-like """ - return self.learner.classes_ + return self.classifier.classes_ class AggregativeProbabilisticQuantifier(AggregativeQuantifier): @@ -130,43 +130,43 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier): """ def classify(self, instances): - return self.learner.predict_proba(instances) + return self.classifier.predict_proba(instances) - def set_params(self, **parameters): - if isinstance(self.learner, CalibratedClassifierCV): - if self.learner.get_params().get('base_estimator') == 'deprecated': - key_prefix = 'estimator__' # this has changed in the newer versions of sklearn - else: - key_prefix = 'base_estimator__' - parameters = {key_prefix + k: v for k, v in parameters.items()} - elif isinstance(self.learner, RecalibratedClassifier): - parameters = {'estimator__' + k: v for k, v in parameters.items()} - - self.learner.set_params(**parameters) - return self + # def set_params(self, **parameters): + # if isinstance(self.classifier, CalibratedClassifierCV): + # if self.classifier.get_params().get('base_estimator') == 'deprecated': + # key_prefix = 'estimator__' # this has changed in the newer versions of sklearn + # else: + # key_prefix = 'base_estimator__' + # parameters = {key_prefix + k: v for k, v in parameters.items()} + # elif isinstance(self.classifier, RecalibratedClassifier): + # parameters = {'estimator__' + k: v for k, v in parameters.items()} + # + # self.classifier.set_params(**parameters) + # return self # Helper # ------------------------------------ -def _ensure_probabilistic(learner): - if not hasattr(learner, 'predict_proba'): - print(f'The learner {learner.__class__.__name__} does not seem to be probabilistic. ' +def _ensure_probabilistic(classifier): + if not hasattr(classifier, 'predict_proba'): + print(f'The learner {classifier.__class__.__name__} does not seem to be probabilistic. ' f'The learner will be calibrated.') - learner = CalibratedClassifierCV(learner, cv=5) - return learner + classifier = CalibratedClassifierCV(classifier, cv=5) + return classifier -def _training_helper(learner, +def _training_helper(classifier, data: LabelledCollection, - fit_learner: bool = True, + fit_classifier: bool = True, ensure_probabilistic=False, val_split: Union[LabelledCollection, float] = None): """ Training procedure common to all Aggregative Quantifiers. - :param learner: the learner to be fit + :param classifier: the learner to be fit :param data: the data on which to fit the learner. If requested, the data will be split before fitting the learner. - :param fit_learner: whether or not to fit the learner (if False, then bypasses any action) + :param fit_classifier: whether or not to fit the learner (if False, then bypasses any action) :param ensure_probabilistic: if True, guarantees that the resulting classifier implements predict_proba (if the learner is not probabilistic, then a CalibratedCV instance of it is trained) :param val_split: if specified as a float, indicates the proportion of training instances that will define the @@ -175,9 +175,9 @@ def _training_helper(learner, :return: the learner trained on the training set, and the unused data (a _LabelledCollection_ if train_val_split>0 or None otherwise) to be used as a validation set for any subsequent parameter fitting """ - if fit_learner: + if fit_classifier: if ensure_probabilistic: - learner = _ensure_probabilistic(learner) + classifier = _ensure_probabilistic(classifier) if val_split is not None: if isinstance(val_split, float): if not (0 < val_split < 1): @@ -193,72 +193,72 @@ def _training_helper(learner, else: train, unused = data, None - if isinstance(learner, BaseQuantifier): - learner.fit(train) + if isinstance(classifier, BaseQuantifier): + classifier.fit(train) else: - learner.fit(*train.Xy) + classifier.fit(*train.Xy) else: if ensure_probabilistic: - if not hasattr(learner, 'predict_proba'): - raise AssertionError('error: the learner cannot be calibrated since fit_learner is set to False') + if not hasattr(classifier, 'predict_proba'): + raise AssertionError('error: the learner cannot be calibrated since fit_classifier is set to False') unused = None if isinstance(val_split, LabelledCollection): unused = val_split - return learner, unused + return classifier, unused def cross_generate_predictions( data, - learner, + classifier, val_split, probabilistic, - fit_learner, + fit_classifier, n_jobs ): n_jobs = qp.get_njobs(n_jobs) if isinstance(val_split, int): - assert fit_learner == True, \ - 'the parameters for the adjustment cannot be estimated with kFCV with fit_learner=False' + assert fit_classifier == True, \ + 'the parameters for the adjustment cannot be estimated with kFCV with fit_classifier=False' if probabilistic: - learner = _ensure_probabilistic(learner) + classifier = _ensure_probabilistic(classifier) predict = 'predict_proba' else: predict = 'predict' - y_pred = cross_val_predict(learner, *data.Xy, cv=val_split, n_jobs=n_jobs, method=predict) + y_pred = cross_val_predict(classifier, *data.Xy, cv=val_split, n_jobs=n_jobs, method=predict) class_count = data.counts() # fit the learner on all data - learner.fit(*data.Xy) + classifier.fit(*data.Xy) y = data.y classes = data.classes_ else: - learner, val_data = _training_helper( - learner, data, fit_learner, ensure_probabilistic=probabilistic, val_split=val_split + classifier, val_data = _training_helper( + classifier, data, fit_classifier, ensure_probabilistic=probabilistic, val_split=val_split ) - y_pred = learner.predict_proba(val_data.instances) if probabilistic else learner.predict(val_data.instances) + y_pred = classifier.predict_proba(val_data.instances) if probabilistic else classifier.predict(val_data.instances) y = val_data.labels classes = val_data.classes_ class_count = val_data.counts() - return learner, y, y_pred, classes, class_count + return classifier, y, y_pred, classes, class_count def cross_generate_predictions_depr( data, - learner, + classifier, val_split, probabilistic, - fit_learner, + fit_classifier, method_name='' ): - predict = learner.predict_proba if probabilistic else learner.predict + predict = classifier.predict_proba if probabilistic else classifier.predict if isinstance(val_split, int): - assert fit_learner == True, \ - 'the parameters for the adjustment cannot be estimated with kFCV with fit_learner=False' + assert fit_classifier == True, \ + 'the parameters for the adjustment cannot be estimated with kFCV with fit_classifier=False' # kFCV estimation of parameters y, y_ = [], [] kfcv = StratifiedKFold(n_splits=val_split) @@ -267,8 +267,8 @@ def cross_generate_predictions_depr( pbar.set_description(f'{method_name}\tfitting fold {k}') training = data.sampling_from_index(training_idx) validation = data.sampling_from_index(validation_idx) - learner, val_data = _training_helper( - learner, training, fit_learner, ensure_probabilistic=probabilistic, val_split=validation + classifier, val_data = _training_helper( + classifier, training, fit_classifier, ensure_probabilistic=probabilistic, val_split=validation ) y_.append(predict(val_data.instances)) y.append(val_data.labels) @@ -278,21 +278,21 @@ def cross_generate_predictions_depr( class_count = data.counts() # fit the learner on all data - learner, _ = _training_helper( - learner, data, fit_learner, ensure_probabilistic=probabilistic, val_split=None + classifier, _ = _training_helper( + classifier, data, fit_classifier, ensure_probabilistic=probabilistic, val_split=None ) classes = data.classes_ else: - learner, val_data = _training_helper( - learner, data, fit_learner, ensure_probabilistic=probabilistic, val_split=val_split + classifier, val_data = _training_helper( + classifier, data, fit_classifier, ensure_probabilistic=probabilistic, val_split=val_split ) y_ = predict(val_data.instances) y = val_data.labels classes = val_data.classes_ class_count = val_data.counts() - return learner, y, y_, classes, class_count + return classifier, y, y_, classes, class_count # Methods # ------------------------------------ @@ -301,22 +301,22 @@ class CC(AggregativeQuantifier): The most basic Quantification method. One that simply classifies all instances and counts how many have been attributed to each of the classes in order to compute class prevalence estimates. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier """ - def __init__(self, learner: BaseEstimator): - self.learner = learner + def __init__(self, classifier: BaseEstimator): + self.classifier = classifier - def fit(self, data: LabelledCollection, fit_learner=True): + def fit(self, data: LabelledCollection, fit_classifier=True): """ - Trains the Classify & Count method unless `fit_learner` is False, in which case, the classifier is assumed to + Trains the Classify & Count method unless `fit_classifier` is False, in which case, the classifier is assumed to be already fit and there is nothing else to do. :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data - :param fit_learner: if False, the classifier is assumed to be fit + :param fit_classifier: if False, the classifier is assumed to be fit :return: self """ - self.learner, _ = _training_helper(self.learner, data, fit_learner) + self.classifier, _ = _training_helper(self.classifier, data, fit_classifier) return self def aggregate(self, classif_predictions: np.ndarray): @@ -335,7 +335,7 @@ class ACC(AggregativeQuantifier): the "adjusted" variant of :class:`CC`, that corrects the predictions of CC according to the `misclassification rates`. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the misclassification rates are to be estimated. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -344,17 +344,17 @@ class ACC(AggregativeQuantifier): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=None): - self.learner = learner + def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): + self.classifier = classifier self.val_split = val_split self.n_jobs = qp.get_njobs(n_jobs) - def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): + def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): """ Trains a ACC quantifier. :param data: the training set - :param fit_learner: set to False to bypass the training (the learner is assumed to be already fit) + :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit) :param val_split: either a float in (0,1) indicating the proportion of training instances to use for validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection indicating the validation set itself, or an int indicating the number `k` of folds to be used in `k`-fold @@ -365,11 +365,11 @@ class ACC(AggregativeQuantifier): if val_split is None: val_split = self.val_split - self.learner, y, y_, classes, class_count = cross_generate_predictions( - data, self.learner, val_split, probabilistic=False, fit_learner=fit_learner, n_jobs=self.n_jobs + self.classifier, y, y_, classes, class_count = cross_generate_predictions( + data, self.classifier, val_split, probabilistic=False, fit_classifier=fit_classifier, n_jobs=self.n_jobs ) - self.cc = CC(self.learner) + self.cc = CC(self.classifier) self.Pte_cond_estim_ = self.getPteCondEstim(data.classes_, y, y_) return self @@ -422,14 +422,14 @@ class PCC(AggregativeProbabilisticQuantifier): `Probabilistic Classify & Count `_, the probabilistic variant of CC that relies on the posterior probabilities returned by a probabilistic classifier. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier """ - def __init__(self, learner: BaseEstimator): - self.learner = learner + def __init__(self, classifier: BaseEstimator): + self.classifier = classifier - def fit(self, data: LabelledCollection, fit_learner=True): - self.learner, _ = _training_helper(self.learner, data, fit_learner, ensure_probabilistic=True) + def fit(self, data: LabelledCollection, fit_classifier=True): + self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True) return self def aggregate(self, classif_posteriors): @@ -441,7 +441,7 @@ class PACC(AggregativeProbabilisticQuantifier): `Probabilistic Adjusted Classify & Count `_, the probabilistic variant of ACC that relies on the posterior probabilities returned by a probabilistic classifier. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the misclassification rates are to be estimated. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -451,17 +451,17 @@ class PACC(AggregativeProbabilisticQuantifier): :param n_jobs: number of parallel workers """ - def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=None): - self.learner = learner + def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): + self.classifier = classifier self.val_split = val_split self.n_jobs = qp.get_njobs(n_jobs) - def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): + def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): """ Trains a PACC quantifier. :param data: the training set - :param fit_learner: set to False to bypass the training (the learner is assumed to be already fit) + :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit) :param val_split: either a float in (0,1) indicating the proportion of training instances to use for validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV @@ -472,11 +472,11 @@ class PACC(AggregativeProbabilisticQuantifier): if val_split is None: val_split = self.val_split - self.learner, y, y_, classes, class_count = cross_generate_predictions( - data, self.learner, val_split, probabilistic=True, fit_learner=fit_learner, n_jobs=self.n_jobs + self.classifier, y, y_, classes, class_count = cross_generate_predictions( + data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs ) - self.pcc = PCC(self.learner) + self.pcc = PCC(self.classifier) self.Pte_cond_estim_ = self.getPteCondEstim(classes, y, y_) return self @@ -510,7 +510,7 @@ class EMQ(AggregativeProbabilisticQuantifier): probabilities generated by a probabilistic classifier and the class prevalence estimates obtained via maximum-likelihood estimation, in a mutually recursive way, until convergence. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param exact_train_prev: set to True (default) for using, as the initial observation, the true training prevalence; or set to False for computing the training prevalence as an estimate, akin to PCC, i.e., as the expected value of the posterior probabilities of the training instances as suggested in @@ -523,30 +523,32 @@ class EMQ(AggregativeProbabilisticQuantifier): MAX_ITER = 1000 EPSILON = 1e-4 - def __init__(self, learner: BaseEstimator, exact_train_prev=True, recalib=None): - self.learner = learner + def __init__(self, classifier: BaseEstimator, exact_train_prev=True, recalib=None): + self.classifier = classifier self.exact_train_prev = exact_train_prev self.recalib = recalib - def fit(self, data: LabelledCollection, fit_learner=True): + def fit(self, data: LabelledCollection, fit_classifier=True): if self.recalib is not None: if self.recalib == 'nbvs': - self.learner = NBVSCalibration(self.learner) + self.classifier = NBVSCalibration(self.classifier) elif self.recalib == 'bcts': - self.learner = BCTSCalibration(self.learner) + self.classifier = BCTSCalibration(self.classifier) elif self.recalib == 'ts': - self.learner = TSCalibration(self.learner) + self.classifier = TSCalibration(self.classifier) elif self.recalib == 'vs': - self.learner = VSCalibration(self.learner) + self.classifier = VSCalibration(self.classifier) + elif self.recalib == 'platt': + self.classifier = CalibratedClassifierCV(self.classifier, ensemble=False) else: raise ValueError('invalid param argument for recalibration method; available ones are ' '"nbvs", "bcts", "ts", and "vs".') - self.learner, _ = _training_helper(self.learner, data, fit_learner, ensure_probabilistic=True) + self.classifier, _ = _training_helper(self.classifier, data, fit_classifier, ensure_probabilistic=True) if self.exact_train_prev: self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_) else: self.train_prevalence = qp.model_selection.cross_val_predict( - quantifier=PCC(deepcopy(self.learner)), + quantifier=PCC(deepcopy(self.classifier)), data=data, nfolds=3, random_state=0 @@ -558,7 +560,7 @@ class EMQ(AggregativeProbabilisticQuantifier): return priors def predict_proba(self, instances, epsilon=EPSILON): - classif_posteriors = self.learner.predict_proba(instances) + classif_posteriors = self.classifier.predict_proba(instances) priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon) return posteriors @@ -611,21 +613,21 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier): class-conditional distributions of the posterior probabilities returned for the positive and negative validation examples, respectively. The parameters of the mixture thus represent the estimates of the class prevalence values. - :param learner: a sklearn's Estimator that generates a binary classifier + :param classifier: a sklearn's Estimator that generates a binary classifier :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): - self.learner = learner + def __init__(self, classifier: BaseEstimator, val_split=0.4): + self.classifier = classifier self.val_split = val_split - def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection] = None): + def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None): """ Trains a HDy quantifier. :param data: the training set - :param fit_learner: set to False to bypass the training (the learner is assumed to be already fit) + :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit) :param val_split: either a float in (0,1) indicating the proportion of training instances to use for validation (e.g., 0.3 for using 30% of the training set as validation data), or a :class:`quapy.data.base.LabelledCollection` indicating the validation set itself @@ -635,11 +637,11 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier): val_split = self.val_split self._check_binary(data, self.__class__.__name__) - self.learner, validation = _training_helper( - self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split) + self.classifier, validation = _training_helper( + self.classifier, data, fit_classifier, ensure_probabilistic=True, val_split=val_split) Px = self.classify(validation.instances)[:, 1] # takes only the P(y=+1|x) - self.Pxy1 = Px[validation.labels == self.learner.classes_[1]] - self.Pxy0 = Px[validation.labels == self.learner.classes_[0]] + self.Pxy1 = Px[validation.labels == self.classifier.classes_[1]] + self.Pxy0 = Px[validation.labels == self.classifier.classes_[0]] # pre-compute the histogram for positive and negative examples self.bins = np.linspace(10, 110, 11, dtype=int) # [10, 20, 30, ..., 100, 110] self.Pxy1_density = {bins: np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)[0] for bins in @@ -684,7 +686,7 @@ class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): minimizes the distance between distributions. Details for the ternary search have been got from - :param learner: a sklearn's Estimator that generates a binary classifier + :param classifier: a sklearn's Estimator that generates a binary classifier :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself). :param n_bins: an int with the number of bins to use to compute the histograms. @@ -693,8 +695,8 @@ class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): :param tol: a float with the tolerance for the ternary search algorithm. """ - def __init__(self, learner: BaseEstimator, val_split=0.4, n_bins=8, distance: Union[str, Callable]='HD', tol=1e-05): - self.learner = learner + def __init__(self, classifier: BaseEstimator, val_split=0.4, n_bins=8, distance: Union[str, Callable]='HD', tol=1e-05): + self.classifier = classifier self.val_split = val_split self.tol = tol self.distance = distance @@ -717,23 +719,23 @@ class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): return (left + right) / 2 def _compute_distance(self, Px_train, Px_test, distance: Union[str, Callable]='HD'): - if distance=='HD': + if distance == 'HD': return F.HellingerDistance(Px_train, Px_test) - elif distance=='topsoe': + elif distance == 'topsoe': return F.TopsoeDistance(Px_train, Px_test) else: return distance(Px_train, Px_test) - def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection] = None): + def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None): if val_split is None: val_split = self.val_split self._check_binary(data, self.__class__.__name__) - self.learner, validation = _training_helper( - self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split) + self.classifier, validation = _training_helper( + self.classifier, data, fit_classifier, ensure_probabilistic=True, val_split=val_split) Px = self.classify(validation.instances)[:, 1] # takes only the P(y=+1|x) - self.Pxy1 = Px[validation.labels == self.learner.classes_[1]] - self.Pxy0 = Px[validation.labels == self.learner.classes_[0]] + self.Pxy1 = Px[validation.labels == self.classifier.classes_[1]] + self.Pxy0 = Px[validation.labels == self.classifier.classes_[0]] self.Pxy1_density = np.histogram(self.Pxy1, bins=self.n_bins, range=(0, 1), density=True)[0] self.Pxy0_density = np.histogram(self.Pxy0, bins=self.n_bins, range=(0, 1), density=True)[0] return self @@ -757,25 +759,25 @@ class SMM(AggregativeProbabilisticQuantifier, BinaryQuantifier): SMM is a simplification of matching distribution methods where the representation of the examples is created using the mean instead of a histogram. - :param learner: a sklearn's Estimator that generates a binary classifier. + :param classifier: a sklearn's Estimator that generates a binary classifier. :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): - self.learner = learner + def __init__(self, classifier: BaseEstimator, val_split=0.4): + self.classifier = classifier self.val_split = val_split - def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, LabelledCollection] = None): + def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None): if val_split is None: val_split = self.val_split self._check_binary(data, self.__class__.__name__) - self.learner, validation = _training_helper( - self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split) + self.classifier, validation = _training_helper( + self.classifier, data, fit_classifier, ensure_probabilistic=True, val_split=val_split) Px = self.classify(validation.instances)[:, 1] # takes only the P(y=+1|x) - self.Pxy1 = Px[validation.labels == self.learner.classes_[1]] - self.Pxy0 = Px[validation.labels == self.learner.classes_[0]] + self.Pxy1 = Px[validation.labels == self.classifier.classes_[1]] + self.Pxy0 = Px[validation.labels == self.classifier.classes_[0]] self.Pxy1_mean = np.mean(self.Pxy1) self.Pxy0_mean = np.mean(self.Pxy0) return self @@ -809,19 +811,19 @@ class ELM(AggregativeQuantifier, BinaryQuantifier): self.svmperf_base = svmperf_base if svmperf_base is not None else qp.environ['SVMPERF_HOME'] self.loss = loss self.kwargs = kwargs - self.learner = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) + self.classifier = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) - def fit(self, data: LabelledCollection, fit_learner=True): + def fit(self, data: LabelledCollection, fit_classifier=True): self._check_binary(data, self.__class__.__name__) - assert fit_learner, 'the method requires that fit_learner=True' - self.learner.fit(data.instances, data.labels) + assert fit_classifier, 'the method requires that fit_classifier=True' + self.classifier.fit(data.instances, data.labels) return self def aggregate(self, classif_predictions: np.ndarray): return F.prevalence_from_labels(classif_predictions, self.classes_) def classify(self, X, y=None): - return self.learner.predict(X) + return self.classifier.predict(X) class SVMQ(ELM): @@ -916,7 +918,7 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): that would allow for more true positives and many more false positives, on the grounds this would deliver larger denominators. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the misclassification rates are to be estimated. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -925,22 +927,22 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4, n_jobs=None): - self.learner = learner + def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): + self.classifier = classifier self.val_split = val_split self.n_jobs = qp.get_njobs(n_jobs) - def fit(self, data: LabelledCollection, fit_learner=True, val_split: Union[float, int, LabelledCollection] = None): + def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): self._check_binary(data, "Threshold Optimization") if val_split is None: val_split = self.val_split - self.learner, y, y_, classes, class_count = cross_generate_predictions( - data, self.learner, val_split, probabilistic=True, fit_learner=fit_learner, n_jobs=self.n_jobs + self.classifier, y, y_, classes, class_count = cross_generate_predictions( + data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs ) - self.cc = CC(self.learner) + self.cc = CC(self.classifier) self.tpr, self.fpr = self._optimize_threshold(y, y_) @@ -1018,7 +1020,7 @@ class T50(ThresholdOptimization): for the threshold that makes `tpr` cosest to 0.5. The goal is to bring improved stability to the denominator of the adjustment. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the misclassification rates are to be estimated. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -1027,8 +1029,8 @@ class T50(ThresholdOptimization): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): - super().__init__(learner, val_split) + def __init__(self, classifier: BaseEstimator, val_split=0.4): + super().__init__(classifier, val_split) def _condition(self, tpr, fpr) -> float: return abs(tpr - 0.5) @@ -1042,7 +1044,7 @@ class MAX(ThresholdOptimization): for the threshold that maximizes `tpr-fpr`. The goal is to bring improved stability to the denominator of the adjustment. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the misclassification rates are to be estimated. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -1051,8 +1053,8 @@ class MAX(ThresholdOptimization): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): - super().__init__(learner, val_split) + def __init__(self, classifier: BaseEstimator, val_split=0.4): + super().__init__(classifier, val_split) def _condition(self, tpr, fpr) -> float: # MAX strives to maximize (tpr - fpr), which is equivalent to minimize (fpr - tpr) @@ -1067,7 +1069,7 @@ class X(ThresholdOptimization): for the threshold that yields `tpr=1-fpr`. The goal is to bring improved stability to the denominator of the adjustment. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the misclassification rates are to be estimated. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -1076,8 +1078,8 @@ class X(ThresholdOptimization): :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): - super().__init__(learner, val_split) + def __init__(self, classifier: BaseEstimator, val_split=0.4): + super().__init__(classifier, val_split) def _condition(self, tpr, fpr) -> float: return abs(1 - (tpr + fpr)) @@ -1091,7 +1093,7 @@ class MS(ThresholdOptimization): class prevalence estimates for all decision thresholds and returns the median of them all. The goal is to bring improved stability to the denominator of the adjustment. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the misclassification rates are to be estimated. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -1099,8 +1101,8 @@ class MS(ThresholdOptimization): `k`-fold cross validation (this integer stands for the number of folds `k`), or as a :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): - super().__init__(learner, val_split) + def __init__(self, classifier: BaseEstimator, val_split=0.4): + super().__init__(classifier, val_split) def _condition(self, tpr, fpr) -> float: pass @@ -1128,7 +1130,7 @@ class MS2(MS): which `tpr-fpr>0.25` The goal is to bring improved stability to the denominator of the adjustment. - :param learner: a sklearn's Estimator that generates a classifier + :param classifier: a sklearn's Estimator that generates a classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the misclassification rates are to be estimated. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -1136,8 +1138,8 @@ class MS2(MS): `k`-fold cross validation (this integer stands for the number of folds `k`), or as a :class:`quapy.data.base.LabelledCollection` (the split itself). """ - def __init__(self, learner: BaseEstimator, val_split=0.4): - super().__init__(learner, val_split) + def __init__(self, classifier: BaseEstimator, val_split=0.4): + super().__init__(classifier, val_split) def _optimize_threshold(self, y, probabilities): tprs = [0, 1] @@ -1174,7 +1176,8 @@ class OneVsAll(AggregativeQuantifier): This variant was used, along with the :class:`EMQ` quantifier, in `Gao and Sebastiani, 2016 `_. - :param learner: a sklearn's Estimator that generates a binary classifier + :param binary_quantifier: a quantifier (binary) that will be employed to work on multiclass model in a + one-vs-all manner :param n_jobs: number of parallel workers """ @@ -1186,11 +1189,11 @@ class OneVsAll(AggregativeQuantifier): self.binary_quantifier = binary_quantifier self.n_jobs = qp.get_njobs(n_jobs) - def fit(self, data: LabelledCollection, fit_learner=True): + def fit(self, data: LabelledCollection, fit_classifier=True): assert not data.binary, \ f'{self.__class__.__name__} expect non-binary data' - assert fit_learner == True, \ - 'fit_learner must be True' + assert fit_classifier == True, \ + 'fit_classifier must be True' self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_} self.__parallel(self._delayed_binary_fit, data) diff --git a/quapy/method/base.py b/quapy/method/base.py index c935735..459130c 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -1,12 +1,15 @@ from abc import ABCMeta, abstractmethod from copy import deepcopy + +from sklearn.base import BaseEstimator + import quapy as qp from quapy.data import LabelledCollection # Base Quantifier abstract class # ------------------------------------ -class BaseQuantifier(metaclass=ABCMeta): +class BaseQuantifier(BaseEstimator): """ Abstract Quantifier. A quantifier is defined as an object of a class that implements the method :meth:`fit` on :class:`quapy.data.base.LabelledCollection`, the method :meth:`quantify`, and the :meth:`set_params` and @@ -33,24 +36,24 @@ class BaseQuantifier(metaclass=ABCMeta): """ ... - @abstractmethod - def set_params(self, **parameters): - """ - Set the parameters of the quantifier. - - :param parameters: dictionary of param-value pairs - """ - ... - - @abstractmethod - def get_params(self, deep=True): - """ - Return the current parameters of the quantifier. - - :param deep: for compatibility with sklearn - :return: a dictionary of param-value pairs - """ - ... + # @abstractmethod + # def set_params(self, **parameters): + # """ + # Set the parameters of the quantifier. + # + # :param parameters: dictionary of param-value pairs + # """ + # ... + # + # @abstractmethod + # def get_params(self, deep=True): + # """ + # Return the current parameters of the quantifier. + # + # :param deep: for compatibility with sklearn + # :return: a dictionary of param-value pairs + # """ + # ... class BinaryQuantifier(BaseQuantifier): @@ -67,7 +70,7 @@ class BinaryQuantifier(BaseQuantifier): class OneVsAllGeneric: """ Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary - quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1. + quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1. """ def __init__(self, binary_quantifier, n_jobs=None): @@ -103,11 +106,11 @@ class OneVsAllGeneric: def get_params(self, deep=True): return self.binary_quantifier.get_params() - def _delayed_binary_predict(self, c, learners, X): - return learners[c].quantify(X)[:,1] # the mean is the estimation for the positive class prevalence + def _delayed_binary_predict(self, c, quantifiers, X): + return quantifiers[c].quantify(X)[:, 1] # the mean is the estimation for the positive class prevalence - def _delayed_binary_fit(self, c, learners, data, **kwargs): + def _delayed_binary_fit(self, c, quantifiers, data, **kwargs): bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2) - learners[c].fit(bindata, **kwargs) + quantifiers[c].fit(bindata, **kwargs) diff --git a/quapy/method/meta.py b/quapy/method/meta.py index 5e084e5..82d3a35 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -146,7 +146,7 @@ class Ensemble(BaseQuantifier): This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility with the abstract class). Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or - `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a learner `l` optimized for + `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for classification (not recommended). :param parameters: dictionary @@ -154,7 +154,7 @@ class Ensemble(BaseQuantifier): """ raise NotImplementedError(f'{self.__class__.__name__} should not be used within GridSearchQ; ' f'instead, use Ensemble(GridSearchQ(q),...), with q a Quantifier (recommended), ' - f'or Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a learner ' + f'or Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a classifier ' f'l optimized for classification (not recommended).') def get_params(self, deep=True): @@ -162,7 +162,7 @@ class Ensemble(BaseQuantifier): This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility with the abstract class). Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or - `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a learner `l` optimized for + `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for classification (not recommended). :return: raises an Exception @@ -326,18 +326,18 @@ def _draw_simplex(ndim, min_val, max_trials=100): f'>= {min_val} is unlikely (it failed after {max_trials} trials)') -def _instantiate_ensemble(learner, base_quantifier_class, param_grid, optim, param_model_sel, **kwargs): +def _instantiate_ensemble(classifier, base_quantifier_class, param_grid, optim, param_model_sel, **kwargs): if optim is None: - base_quantifier = base_quantifier_class(learner) + base_quantifier = base_quantifier_class(classifier) elif optim in qp.error.CLASSIFICATION_ERROR: if optim == qp.error.f1e: scoring = make_scorer(f1_score) elif optim == qp.error.acce: scoring = make_scorer(accuracy_score) - learner = GridSearchCV(learner, param_grid, scoring=scoring) - base_quantifier = base_quantifier_class(learner) + classifier = GridSearchCV(classifier, param_grid, scoring=scoring) + base_quantifier = base_quantifier_class(classifier) else: - base_quantifier = GridSearchQ(base_quantifier_class(learner), + base_quantifier = GridSearchQ(base_quantifier_class(classifier), param_grid=param_grid, **param_model_sel, error=optim) @@ -357,7 +357,7 @@ def _check_error(error): f'the name of an error function in {qp.error.ERROR_NAMES}') -def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, param_model_sel: dict = None, +def ensembleFactory(classifier, base_quantifier_class, param_grid=None, optim=None, param_model_sel: dict = None, **kwargs): """ Ensemble factory. Provides a unified interface for instantiating ensembles that can be optimized (via model @@ -390,7 +390,7 @@ def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, >>> >>> ensembleFactory(LogisticRegression(), PACC, optim='mae', policy='mae', **common) - :param learner: sklearn's Estimator that generates a classifier + :param classifier: sklearn's Estimator that generates a classifier :param base_quantifier_class: a class of quantifiers :param param_grid: a dictionary with the grid of parameters to optimize for :param optim: a valid quantification or classification error, or a string name of it @@ -405,21 +405,21 @@ def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, if param_model_sel is None: raise ValueError(f'param_model_sel is None but optim was requested.') error = _check_error(optim) - return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel, **kwargs) + return _instantiate_ensemble(classifier, base_quantifier_class, param_grid, error, param_model_sel, **kwargs) -def ECC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): +def ECC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): """ Implements an ensemble of :class:`quapy.method.aggregative.CC` quantifiers, as used by `Pérez-Gállego et al., 2019 `_. Equivalent to: - >>> ensembleFactory(learner, CC, param_grid, optim, param_mod_sel, **kwargs) + >>> ensembleFactory(classifier, CC, param_grid, optim, param_mod_sel, **kwargs) See :meth:`ensembleFactory` for further details. - :param learner: sklearn's Estimator that generates a classifier + :param classifier: sklearn's Estimator that generates a classifier :param param_grid: a dictionary with the grid of parameters to optimize for :param optim: a valid quantification or classification error, or a string name of it :param param_model_sel: a dictionary containing any keyworded argument to pass to @@ -428,21 +428,21 @@ def ECC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): :return: an instance of :class:`Ensemble` """ - return ensembleFactory(learner, CC, param_grid, optim, param_mod_sel, **kwargs) + return ensembleFactory(classifier, CC, param_grid, optim, param_mod_sel, **kwargs) -def EACC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): +def EACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): """ Implements an ensemble of :class:`quapy.method.aggregative.ACC` quantifiers, as used by `Pérez-Gállego et al., 2019 `_. Equivalent to: - >>> ensembleFactory(learner, ACC, param_grid, optim, param_mod_sel, **kwargs) + >>> ensembleFactory(classifier, ACC, param_grid, optim, param_mod_sel, **kwargs) See :meth:`ensembleFactory` for further details. - :param learner: sklearn's Estimator that generates a classifier + :param classifier: sklearn's Estimator that generates a classifier :param param_grid: a dictionary with the grid of parameters to optimize for :param optim: a valid quantification or classification error, or a string name of it :param param_model_sel: a dictionary containing any keyworded argument to pass to @@ -451,20 +451,20 @@ def EACC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): :return: an instance of :class:`Ensemble` """ - return ensembleFactory(learner, ACC, param_grid, optim, param_mod_sel, **kwargs) + return ensembleFactory(classifier, ACC, param_grid, optim, param_mod_sel, **kwargs) -def EPACC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): +def EPACC(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): """ Implements an ensemble of :class:`quapy.method.aggregative.PACC` quantifiers. Equivalent to: - >>> ensembleFactory(learner, PACC, param_grid, optim, param_mod_sel, **kwargs) + >>> ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs) See :meth:`ensembleFactory` for further details. - :param learner: sklearn's Estimator that generates a classifier + :param classifier: sklearn's Estimator that generates a classifier :param param_grid: a dictionary with the grid of parameters to optimize for :param optim: a valid quantification or classification error, or a string name of it :param param_model_sel: a dictionary containing any keyworded argument to pass to @@ -473,21 +473,21 @@ def EPACC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): :return: an instance of :class:`Ensemble` """ - return ensembleFactory(learner, PACC, param_grid, optim, param_mod_sel, **kwargs) + return ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs) -def EHDy(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): +def EHDy(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): """ Implements an ensemble of :class:`quapy.method.aggregative.HDy` quantifiers, as used by `Pérez-Gállego et al., 2019 `_. Equivalent to: - >>> ensembleFactory(learner, HDy, param_grid, optim, param_mod_sel, **kwargs) + >>> ensembleFactory(classifier, HDy, param_grid, optim, param_mod_sel, **kwargs) See :meth:`ensembleFactory` for further details. - :param learner: sklearn's Estimator that generates a classifier + :param classifier: sklearn's Estimator that generates a classifier :param param_grid: a dictionary with the grid of parameters to optimize for :param optim: a valid quantification or classification error, or a string name of it :param param_model_sel: a dictionary containing any keyworded argument to pass to @@ -496,20 +496,20 @@ def EHDy(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): :return: an instance of :class:`Ensemble` """ - return ensembleFactory(learner, HDy, param_grid, optim, param_mod_sel, **kwargs) + return ensembleFactory(classifier, HDy, param_grid, optim, param_mod_sel, **kwargs) -def EEMQ(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): +def EEMQ(classifier, param_grid=None, optim=None, param_mod_sel=None, **kwargs): """ Implements an ensemble of :class:`quapy.method.aggregative.EMQ` quantifiers. Equivalent to: - >>> ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, **kwargs) + >>> ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs) See :meth:`ensembleFactory` for further details. - :param learner: sklearn's Estimator that generates a classifier + :param classifier: sklearn's Estimator that generates a classifier :param param_grid: a dictionary with the grid of parameters to optimize for :param optim: a valid quantification or classification error, or a string name of it :param param_model_sel: a dictionary containing any keyworded argument to pass to @@ -518,4 +518,4 @@ def EEMQ(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): :return: an instance of :class:`Ensemble` """ - return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, **kwargs) + return ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs) diff --git a/quapy/method/neural.py b/quapy/method/neural.py index 0665634..1871ff0 100644 --- a/quapy/method/neural.py +++ b/quapy/method/neural.py @@ -31,14 +31,14 @@ class QuaNetTrainer(BaseQuantifier): >>> >>> # the text classifier is a CNN trained by NeuralClassifierTrainer >>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes) - >>> learner = NeuralClassifierTrainer(cnn, device='cuda') + >>> classifier = NeuralClassifierTrainer(cnn, device='cuda') >>> >>> # train QuaNet (QuaNet is an alias to QuaNetTrainer) - >>> model = QuaNet(learner, qp.environ['SAMPLE_SIZE'], device='cuda') + >>> model = QuaNet(classifier, qp.environ['SAMPLE_SIZE'], device='cuda') >>> model.fit(dataset.training) >>> estim_prevalence = model.quantify(dataset.test.instances) - :param learner: an object implementing `fit` (i.e., that can be trained on labelled data), + :param classifier: an object implementing `fit` (i.e., that can be trained on labelled data), `predict_proba` (i.e., that can generate posterior probabilities of unlabelled examples) and `transform` (i.e., that can generate embedded representations of the unlabelled instances). :param sample_size: integer, the sample size @@ -60,7 +60,7 @@ class QuaNetTrainer(BaseQuantifier): """ def __init__(self, - learner, + classifier, sample_size, n_epochs=100, tr_iter_per_poch=500, @@ -76,13 +76,13 @@ class QuaNetTrainer(BaseQuantifier): checkpointname=None, device='cuda'): - assert hasattr(learner, 'transform'), \ - f'the learner {learner.__class__.__name__} does not seem to be able to produce document embeddings ' \ + assert hasattr(classifier, 'transform'), \ + f'the classifier {classifier.__class__.__name__} does not seem to be able to produce document embeddings ' \ f'since it does not implement the method "transform"' - assert hasattr(learner, 'predict_proba'), \ - f'the learner {learner.__class__.__name__} does not seem to be able to produce posterior probabilities ' \ + assert hasattr(classifier, 'predict_proba'), \ + f'the classifier {classifier.__class__.__name__} does not seem to be able to produce posterior probabilities ' \ f'since it does not implement the method "predict_proba"' - self.learner = learner + self.classifier = classifier self.sample_size = sample_size self.n_epochs = n_epochs self.tr_iter = tr_iter_per_poch @@ -105,26 +105,26 @@ class QuaNetTrainer(BaseQuantifier): self.checkpoint = os.path.join(checkpointdir, checkpointname) self.device = torch.device(device) - self.__check_params_colision(self.quanet_params, self.learner.get_params()) + self.__check_params_colision(self.quanet_params, self.classifier.get_params()) self._classes_ = None - def fit(self, data: LabelledCollection, fit_learner=True): + def fit(self, data: LabelledCollection, fit_classifier=True): """ Trains QuaNet. - :param data: the training data on which to train QuaNet. If `fit_learner=True`, the data will be split in + :param data: the training data on which to train QuaNet. If `fit_classifier=True`, the data will be split in 40/40/20 for training the classifier, training QuaNet, and validating QuaNet, respectively. If - `fit_learner=False`, the data will be split in 66/34 for training QuaNet and validating it, respectively. - :param fit_learner: if True, trains the classifier on a split containing 40% of the data + `fit_classifier=False`, the data will be split in 66/34 for training QuaNet and validating it, respectively. + :param fit_classifier: if True, trains the classifier on a split containing 40% of the data :return: self """ self._classes_ = data.classes_ os.makedirs(self.checkpointdir, exist_ok=True) - if fit_learner: + if fit_classifier: classifier_data, unused_data = data.split_stratified(0.4) train_data, valid_data = unused_data.split_stratified(0.66) # 0.66 split of 60% makes 40% and 20% - self.learner.fit(*classifier_data.Xy) + self.classifier.fit(*classifier_data.Xy) else: classifier_data = None train_data, valid_data = data.split_stratified(0.66) @@ -133,21 +133,21 @@ class QuaNetTrainer(BaseQuantifier): self.tr_prev = data.prevalence() # compute the posterior probabilities of the instances - valid_posteriors = self.learner.predict_proba(valid_data.instances) - train_posteriors = self.learner.predict_proba(train_data.instances) + valid_posteriors = self.classifier.predict_proba(valid_data.instances) + train_posteriors = self.classifier.predict_proba(train_data.instances) # turn instances' original representations into embeddings - valid_data_embed = LabelledCollection(self.learner.transform(valid_data.instances), valid_data.labels, self._classes_) - train_data_embed = LabelledCollection(self.learner.transform(train_data.instances), train_data.labels, self._classes_) + valid_data_embed = LabelledCollection(self.classifier.transform(valid_data.instances), valid_data.labels, self._classes_) + train_data_embed = LabelledCollection(self.classifier.transform(train_data.instances), train_data.labels, self._classes_) self.quantifiers = { - 'cc': CC(self.learner).fit(None, fit_learner=False), - 'acc': ACC(self.learner).fit(None, fit_learner=False, val_split=valid_data), - 'pcc': PCC(self.learner).fit(None, fit_learner=False), - 'pacc': PACC(self.learner).fit(None, fit_learner=False, val_split=valid_data), + 'cc': CC(self.classifier).fit(None, fit_classifier=False), + 'acc': ACC(self.classifier).fit(None, fit_classifier=False, val_split=valid_data), + 'pcc': PCC(self.classifier).fit(None, fit_classifier=False), + 'pacc': PACC(self.classifier).fit(None, fit_classifier=False, val_split=valid_data), } if classifier_data is not None: - self.quantifiers['emq'] = EMQ(self.learner).fit(classifier_data, fit_learner=False) + self.quantifiers['emq'] = EMQ(self.classifier).fit(classifier_data, fit_classifier=False) self.status = { 'tr-loss': -1, @@ -199,8 +199,8 @@ class QuaNetTrainer(BaseQuantifier): return prevs_estim def quantify(self, instances): - posteriors = self.learner.predict_proba(instances) - embeddings = self.learner.transform(instances) + posteriors = self.classifier.predict_proba(instances) + embeddings = self.classifier.transform(instances) quant_estims = self._get_aggregative_estims(posteriors) self.quanet.eval() with torch.no_grad(): @@ -264,7 +264,7 @@ class QuaNetTrainer(BaseQuantifier): f'patience={early_stop.patience}/{early_stop.PATIENCE_LIMIT}') def get_params(self, deep=True): - return {**self.learner.get_params(), **self.quanet_params} + return {**self.classifier.get_params(), **self.quanet_params} def set_params(self, **parameters): learner_params = {} @@ -273,7 +273,7 @@ class QuaNetTrainer(BaseQuantifier): self.quanet_params[key] = val else: learner_params[key] = val - self.learner.set_params(**learner_params) + self.classifier.set_params(**learner_params) def __check_params_colision(self, quanet_params, learner_params): quanet_keys = set(quanet_params.keys()) @@ -281,7 +281,7 @@ class QuaNetTrainer(BaseQuantifier): intersection = quanet_keys.intersection(learner_keys) if len(intersection) > 0: raise ValueError(f'the use of parameters {intersection} is ambiguous sine those can refer to ' - f'the parameters of QuaNet or the learner {self.learner.__class__.__name__}') + f'the parameters of QuaNet or the learner {self.classifier.__class__.__name__}') def clean_checkpoint(self): """ diff --git a/quapy/model_selection.py b/quapy/model_selection.py index f7c5b94..3cb22c7 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -88,7 +88,12 @@ class GridSearchQ(BaseQuantifier): hyper = [dict({k: values[i] for i, k in enumerate(params_keys)}) for values in itertools.product(*params_values)] #pass a seed to parallel so it is set in clild processes - scores = qp.util.parallel(self._delayed_eval, ((params, training) for params in hyper), seed=qp.environ.get('_R_SEED', None), n_jobs=self.n_jobs) + scores = qp.util.parallel( + self._delayed_eval, + ((params, training) for params in hyper), + seed=qp.environ.get('_R_SEED', None), + n_jobs=self.n_jobs + ) for params, score, model in scores: if score is not None: @@ -103,7 +108,7 @@ class GridSearchQ(BaseQuantifier): tend = time()-tinit if self.best_score_ is None: - raise TimeoutError('all jobs took more than the timeout time to end') + raise TimeoutError('no combination of hyperparameters seem to work') self._sout(f'optimization finished: best params {self.best_params_} (score={self.best_score_:.5f}) ' f'[took {tend:.4f}s]') @@ -150,6 +155,13 @@ class GridSearchQ(BaseQuantifier): except TimeoutError: self._sout(f'timeout ({self.timeout}s) reached for config {params}') score = None + except ValueError as e: + self._sout(f'the combination of hyperparameters {params} is invalid') + raise e + except Exception as e: + self._sout(f'something went wrong for config {params}; skipping:') + self._sout(f'\tException: {e}') + score = None return params, score, model From ceb88792c5669dea84de244783570c1e37f627d6 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 31 Jan 2023 15:08:58 +0100 Subject: [PATCH 47/59] added DistributionMatching method, a generic model for distribution matching for multiclass quantification problems that takes the divergence and number of bins as hyperparameters --- quapy/CHANGE_LOG.txt | 16 +++- quapy/method/aggregative.py | 158 ++++++++++++++++++++++++++++-------- 2 files changed, 136 insertions(+), 38 deletions(-) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index c450b41..f2deea0 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -25,9 +25,9 @@ - cross_val_predict (for quantification) added to model_selection: would be nice to allow the user specifies a test protocol maybe, or None for bypassing it? -- I think Pablo added DyS, Topsoe distance and binary search. +- DyS, Topsoe distance and binary search (thanks to Pablo González) -- I think Pablo added multi-thread reproducibility. +- Multi-thread reproducibility via seeding (thanks to Pablo González) - Bugfix: adding two labelled collections (with +) now checks for consistency in the classes @@ -40,8 +40,16 @@ - the internal classifier of aggregative methods is now called "classifier" instead of "learner" - when optimizing the hyperparameters of an aggregative quantifier, the classifier's specific hyperparameters - should be marked with a "classifier__" prefix (just like in scikit-learn), while the quantifier's specific - hyperparameters are named directly. For example, PCC(LogisticRegression()) quantifier has + should be marked with a "classifier__" prefix (just like in scikit-learn with estimators), while the quantifier's + specific hyperparameters are named directly. For example, PCC(LogisticRegression()) quantifier has hyperparameters + "classifier__C", "classifier__class_weight", etc., instead of "C" and "class_weight" as in v0.1.6. + +- hyperparameters yielding to inconsistent runs raise a ValueError exception, while hyperparameter combinations + yielding to internal errors of surrogate functions are reported and skipped, without stopping the grid search. + +- DistributionMatching methods added. This is a general framework for distribution matching methods that catters for + multiclass quantification. That is to say, one could get a multiclass variant of the (originally binary) HDy + method aligned with the Firat's formulation. Things to fix: - calibration with recalibration methods has to be fixed for exact_train_prev in EMQ (conflicts with clone, deepcopy, etc.) diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 3246b9f..87b682e 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -3,6 +3,7 @@ from copy import deepcopy from typing import Callable, Union import numpy as np from joblib import Parallel, delayed +from scipy import optimize from sklearn.base import BaseEstimator, clone from sklearn.calibration import CalibratedClassifierCV from sklearn.metrics import confusion_matrix @@ -10,8 +11,7 @@ from sklearn.model_selection import StratifiedKFold, cross_val_predict from tqdm import tqdm import quapy as qp import quapy.functional as F -from classification.calibration import RecalibratedProbabilisticClassifier, NBVSCalibration, BCTSCalibration, TSCalibration, \ - VSCalibration +from classification.calibration import NBVSCalibration, BCTSCalibration, TSCalibration, VSCalibration from quapy.classification.svmperf import SVMperf from quapy.data import LabelledCollection from quapy.method.base import BaseQuantifier, BinaryQuantifier @@ -91,25 +91,6 @@ class AggregativeQuantifier(BaseQuantifier): """ ... - # def get_params(self, deep=True): - # """ - # Return the current parameters of the quantifier. - # - # :param deep: for compatibility with sklearn - # :return: a dictionary of param-value pairs - # """ - # - # return self.learner.get_params() - - # def set_params(self, **parameters): - # """ - # Set the parameters of the quantifier. - # - # :param parameters: dictionary of param-value pairs - # """ - # - # self.learner.set_params(**parameters) - @property def classes_(self): """ @@ -690,16 +671,16 @@ class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself). :param n_bins: an int with the number of bins to use to compute the histograms. - :param distance: an str with a distance already included in the librar (HD or topsoe), of a function - that computes the distance between two distributions. + :param divergence: a str indicating the name of divergence (currently supported ones are "HD" or "topsoe"), or a + callable function computes the divergence between two distributions (two equally sized arrays). :param tol: a float with the tolerance for the ternary search algorithm. """ - def __init__(self, classifier: BaseEstimator, val_split=0.4, n_bins=8, distance: Union[str, Callable]='HD', tol=1e-05): + def __init__(self, classifier: BaseEstimator, val_split=0.4, n_bins=8, divergence: Union[str, Callable]= 'HD', tol=1e-05): self.classifier = classifier self.val_split = val_split self.tol = tol - self.distance = distance + self.divergence = divergence self.n_bins = n_bins def _ternary_search(self, f, left, right, tol): @@ -718,14 +699,6 @@ class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): # Left and right are the current bounds; the maximum is between them return (left + right) / 2 - def _compute_distance(self, Px_train, Px_test, distance: Union[str, Callable]='HD'): - if distance == 'HD': - return F.HellingerDistance(Px_train, Px_test) - elif distance == 'topsoe': - return F.TopsoeDistance(Px_train, Px_test) - else: - return distance(Px_train, Px_test) - def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None): if val_split is None: val_split = self.val_split @@ -744,10 +717,11 @@ class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): Px = classif_posteriors[:, 1] # takes only the P(y=+1|x) Px_test = np.histogram(Px, bins=self.n_bins, range=(0, 1), density=True)[0] + divergence = _get_divergence(self.divergence) def distribution_distance(prev): Px_train = prev * self.Pxy1_density + (1 - prev) * self.Pxy0_density - return self._compute_distance(Px_train,Px_test,self.distance) + return divergence(Px_train, Px_test) class1_prev = self._ternary_search(f=distribution_distance, left=0, right=1, tol=self.tol) return np.asarray([1 - class1_prev, class1_prev]) @@ -791,6 +765,122 @@ class SMM(AggregativeProbabilisticQuantifier, BinaryQuantifier): return np.asarray([1 - class1_prev, class1_prev]) +def _get_divergence(divergence: Union[str, Callable]): + if isinstance(divergence, str): + if divergence=='HD': + return F.HellingerDistance + elif divergence=='topsoe': + return F.TopsoeDistance + else: + raise ValueError(f'unknown divergence {divergence}') + elif callable(divergence): + return divergence + else: + raise ValueError(f'argument "divergence" not understood; use a str or a callable function') + +class DistributionMatching(AggregativeProbabilisticQuantifier): + """ + Generic Distribution Matching quantifier for binary or multiclass quantification. + This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters. + + :param classifier: a sklearn's Estimator that generates a probabilistic classifier + :param val_split: indicates the proportion of data to be used as a stratified held-out validation set to model the + validation distribution. + This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of + validation data, or as an integer, indicating that the validation distribution should be estimated via + `k`-fold cross validation (this integer stands for the number of folds `k`), or as a + :class:`quapy.data.base.LabelledCollection` (the split itself). + :param nbins: number of bins used to discretize the distributions (default 8) + :param divergence: a string representing a divergence measure (currently, "HD" and "topsoe" are implemented) + or a callable function taking two ndarrays of the same dimension as input (default "HD", meaning Hellinger + Distance) + :param cdf: whether or not to use CDF instead of PDF (default False) + :param n_jobs: number of parallel workers (default None) + """ + + def __init__(self, classifier, val_split=0.4, nbins=8, divergence: Union[str, Callable]='HD', cdf=False, n_jobs=None): + + self.classifier = classifier + self.val_split = val_split + self.nbins = nbins + self.divergence = divergence + self.cdf = cdf + self.n_jobs = n_jobs + + def __get_distributions(self, posteriors): + histograms = [] + post_dims = posteriors.shape[1] + if post_dims == 2: + # in binary quantification we can use only one class, since the other one is its complement + post_dims = 1 + for dim in range(post_dims): + hist = np.histogram(posteriors[:, dim], bins=self.nbins, range=(0, 1))[0] + histograms.append(hist) + + counts = np.vstack(histograms) + distributions = counts/counts.sum(axis=1)[:,np.newaxis] + if self.cdf: + distributions = np.cumsum(distributions, axis=1) + return distributions + + def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None): + """ + Trains the classifier (if requested) and generates the validation distributions out of the training data. + The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of + channels, and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]` + are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete + distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]` + is the fraction of instances with a value in the `k`-th bin. + + :param data: the training set + :param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit) + :param val_split: either a float in (0,1) indicating the proportion of training instances to use for + validation (e.g., 0.3 for using 30% of the training set as validation data), or a LabelledCollection + indicating the validation set itself, or an int indicating the number k of folds to be used in kFCV + to estimate the parameters + """ + if val_split is None: + val_split = self.val_split + + self.classifier, y, posteriors, classes, class_count = cross_generate_predictions( + data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs + ) + + self.validation_distribution = np.asarray( + [self.__get_distributions(posteriors[y==cat]) for cat in range(data.n_classes)] + ) + + return self + + def aggregate(self, posteriors: np.ndarray): + """ + Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution + (the mixture) that best matches the test distribution, in terms of the divergence measure of choice. + In the multiclass case, with `n` the number of classes, the test and mixture distributions contain + `n` channels (proper distributions of binned posterior probabilities), on which the divergence is computed + independently. The matching is computed as an average of the divergence across all channels. + + :param instances: instances in the sample + :return: a vector of class prevalence estimates + """ + test_distribution = self.__get_distributions(posteriors) + divergence = _get_divergence(self.divergence) + n_classes, n_channels, nbins = self.validation_distribution.shape + def match(prev): + prev = np.expand_dims(prev, axis=0) + mixture_distribution = (prev @ self.validation_distribution.reshape(n_classes,-1)).reshape(n_channels, -1) + divs = [divergence(test_distribution[ch], mixture_distribution[ch]) for ch in range(n_channels)] + return np.mean(divs) + + # the initial point is set as the uniform distribution + uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,)) + + # solutions are bounded to those contained in the unit-simplex + bounds = tuple((0, 1) for x in range(n_classes)) # values in [0,1] + constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1 + r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints) + return r.x + class ELM(AggregativeQuantifier, BinaryQuantifier): """ From 2485117f05d2bca08f187ab2cb0b46961d4b1f2c Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Wed, 8 Feb 2023 19:06:53 +0100 Subject: [PATCH 48/59] adding documentation and adding one new example --- docs/build/html/Datasets.html | 555 +- docs/build/html/Evaluation.html | 57 +- docs/build/html/Installation.html | 57 +- docs/build/html/Methods.html | 99 +- docs/build/html/Model-Selection.html | 65 +- docs/build/html/Plotting.html | 71 +- docs/build/html/_sources/index.rst.txt | 2 +- .../_sources/quapy.classification.rst.txt | 23 +- docs/build/html/_sources/quapy.data.rst.txt | 18 +- docs/build/html/_sources/quapy.method.rst.txt | 22 +- docs/build/html/_sources/quapy.rst.txt | 58 +- docs/build/html/_sources/quapy.tests.rst.txt | 37 - docs/build/html/_sources/readme.rst.txt | 7 - docs/build/html/_sources/readme2.md.txt | 1 - docs/build/html/_static/alabaster.css | 701 - docs/build/html/_static/basic.css | 62 +- docs/build/html/_static/bizstyle.css | 2 + docs/build/html/_static/bizstyle.js | 43 +- docs/build/html/_static/custom.css | 1 - docs/build/html/_static/doctools.js | 373 +- .../html/_static/documentation_options.js | 8 +- docs/build/html/_static/jquery-3.5.1.js | 10872 ---------------- docs/build/html/_static/jquery.js | 4 +- docs/build/html/_static/language_data.js | 102 +- docs/build/html/_static/searchtools.js | 808 +- docs/build/html/genindex.html | 319 +- docs/build/html/index.html | 45 +- docs/build/html/modules.html | 78 +- docs/build/html/objects.inv | Bin 2591 -> 2873 bytes docs/build/html/py-modindex.html | 24 +- docs/build/html/quapy.classification.html | 448 +- docs/build/html/quapy.data.html | 482 +- docs/build/html/quapy.html | 1076 +- docs/build/html/quapy.method.html | 1308 +- docs/build/html/quapy.tests.html | 135 - docs/build/html/readme.html | 129 - docs/build/html/readme2.html | 92 - docs/build/html/search.html | 12 +- docs/build/html/searchindex.js | 2 +- examples/custom_quantifier.py | 69 + quapy/CHANGE_LOG.txt | 78 +- quapy/__init__.py | 21 +- quapy/classification/calibration.py | 77 +- quapy/classification/svmperf.py | 1 + quapy/data/datasets.py | 24 + quapy/data/preprocessing.py | 9 +- quapy/depr_evaluation.py | 439 - quapy/error.py | 5 - quapy/functional.py | 27 +- quapy/method/aggregative.py | 58 +- quapy/method/base.py | 23 +- quapy/method/meta.py | 8 +- quapy/method/non_aggregative.py | 27 - quapy/model_selection.py | 2 +- quapy/protocol.py | 69 +- quapy/tests/test_methods.py | 17 +- quapy/tests/test_modsel.py | 22 +- quapy/tests/test_protocols.py | 6 +- quapy/util.py | 18 +- 59 files changed, 3593 insertions(+), 15605 deletions(-) delete mode 100644 docs/build/html/_sources/quapy.tests.rst.txt delete mode 100644 docs/build/html/_sources/readme.rst.txt delete mode 100644 docs/build/html/_sources/readme2.md.txt delete mode 100644 docs/build/html/_static/alabaster.css delete mode 100644 docs/build/html/_static/custom.css delete mode 100644 docs/build/html/_static/jquery-3.5.1.js delete mode 100644 docs/build/html/quapy.tests.html delete mode 100644 docs/build/html/readme.html delete mode 100644 docs/build/html/readme2.html create mode 100644 examples/custom_quantifier.py delete mode 100644 quapy/depr_evaluation.py diff --git a/docs/build/html/Datasets.html b/docs/build/html/Datasets.html index 6af836e..9c9eaa7 100644 --- a/docs/build/html/Datasets.html +++ b/docs/build/html/Datasets.html @@ -2,23 +2,26 @@ - + - - Datasets — QuaPy 0.1.6 documentation + + + Datasets — QuaPy 0.1.7 documentation + + - - + + - - - -
-
-
-
- -
-

quapy.tests package

-
-

Submodules

-
-
-

quapy.tests.test_base module

-
-
-

quapy.tests.test_datasets module

-
-
-

quapy.tests.test_methods module

-
-
-

Module contents

-
-
- - -
-
-
-
- -
-
- - - - \ No newline at end of file diff --git a/docs/build/html/readme.html b/docs/build/html/readme.html deleted file mode 100644 index c223f24..0000000 --- a/docs/build/html/readme.html +++ /dev/null @@ -1,129 +0,0 @@ - - - - - - - - - Getting Started — QuaPy 0.1.6 documentation - - - - - - - - - - - - - - - - - -
-
-
-
- -
-

Getting Started

-

QuaPy is an open source framework for Quantification (a.k.a. Supervised Prevalence Estimation) written in Python.

-
-

Installation

-
>>> pip install quapy
-
-
-
-
- - -
-
-
-
- -
-
- - - - \ No newline at end of file diff --git a/docs/build/html/readme2.html b/docs/build/html/readme2.html deleted file mode 100644 index e5ff4a6..0000000 --- a/docs/build/html/readme2.html +++ /dev/null @@ -1,92 +0,0 @@ - - - - - - - - - <no title> — QuaPy 0.1.6 documentation - - - - - - - - - - - - - - - -
-
-
-
- -

.. include:: ../../README.md

- - -
-
-
-
- -
-
- - - - \ No newline at end of file diff --git a/docs/build/html/search.html b/docs/build/html/search.html index 2090979..480e246 100644 --- a/docs/build/html/search.html +++ b/docs/build/html/search.html @@ -2,11 +2,11 @@ - + - Search — QuaPy 0.1.6 documentation + Search — QuaPy 0.1.7 documentation @@ -14,7 +14,9 @@ + + @@ -37,7 +39,7 @@
  • modules |
  • - + @@ -97,13 +99,13 @@
  • modules |
  • - + \ No newline at end of file diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 2c03e3a..1f4419a 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({docnames:["Datasets","Evaluation","Installation","Methods","Model-Selection","Plotting","index","modules","quapy","quapy.classification","quapy.data","quapy.method"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["Datasets.md","Evaluation.md","Installation.rst","Methods.md","Model-Selection.md","Plotting.md","index.rst","modules.rst","quapy.rst","quapy.classification.rst","quapy.data.rst","quapy.method.rst"],objects:{"":{quapy:[8,0,0,"-"]},"quapy.classification":{methods:[9,0,0,"-"],neural:[9,0,0,"-"],svmperf:[9,0,0,"-"]},"quapy.classification.methods":{LowRankLogisticRegression:[9,1,1,""]},"quapy.classification.methods.LowRankLogisticRegression":{fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural":{CNNnet:[9,1,1,""],LSTMnet:[9,1,1,""],NeuralClassifierTrainer:[9,1,1,""],TextClassifierNet:[9,1,1,""],TorchDataset:[9,1,1,""]},"quapy.classification.neural.CNNnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.LSTMnet":{document_embedding:[9,2,1,""],get_params:[9,2,1,""],vocabulary_size:[9,3,1,""]},"quapy.classification.neural.NeuralClassifierTrainer":{device:[9,3,1,""],fit:[9,2,1,""],get_params:[9,2,1,""],predict:[9,2,1,""],predict_proba:[9,2,1,""],reset_net_params:[9,2,1,""],set_params:[9,2,1,""],transform:[9,2,1,""]},"quapy.classification.neural.TextClassifierNet":{dimensions:[9,2,1,""],document_embedding:[9,2,1,""],forward:[9,2,1,""],get_params:[9,2,1,""],predict_proba:[9,2,1,""],vocabulary_size:[9,3,1,""],xavier_uniform:[9,2,1,""]},"quapy.classification.neural.TorchDataset":{asDataloader:[9,2,1,""]},"quapy.classification.svmperf":{SVMperf:[9,1,1,""]},"quapy.classification.svmperf.SVMperf":{decision_function:[9,2,1,""],fit:[9,2,1,""],predict:[9,2,1,""],set_params:[9,2,1,""],valid_losses:[9,4,1,""]},"quapy.data":{base:[10,0,0,"-"],datasets:[10,0,0,"-"],preprocessing:[10,0,0,"-"],reader:[10,0,0,"-"]},"quapy.data.base":{Dataset:[10,1,1,""],LabelledCollection:[10,1,1,""],isbinary:[10,5,1,""]},"quapy.data.base.Dataset":{SplitStratified:[10,2,1,""],binary:[10,3,1,""],classes_:[10,3,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],stats:[10,2,1,""],vocabulary_size:[10,3,1,""]},"quapy.data.base.LabelledCollection":{Xy:[10,3,1,""],artificial_sampling_generator:[10,2,1,""],artificial_sampling_index_generator:[10,2,1,""],binary:[10,3,1,""],counts:[10,2,1,""],kFCV:[10,2,1,""],load:[10,2,1,""],n_classes:[10,3,1,""],natural_sampling_generator:[10,2,1,""],natural_sampling_index_generator:[10,2,1,""],prevalence:[10,2,1,""],sampling:[10,2,1,""],sampling_from_index:[10,2,1,""],sampling_index:[10,2,1,""],split_stratified:[10,2,1,""],stats:[10,2,1,""],uniform_sampling:[10,2,1,""],uniform_sampling_index:[10,2,1,""]},"quapy.data.datasets":{fetch_UCIDataset:[10,5,1,""],fetch_UCILabelledCollection:[10,5,1,""],fetch_reviews:[10,5,1,""],fetch_twitter:[10,5,1,""],warn:[10,5,1,""]},"quapy.data.preprocessing":{IndexTransformer:[10,1,1,""],index:[10,5,1,""],reduce_columns:[10,5,1,""],standardize:[10,5,1,""],text2tfidf:[10,5,1,""]},"quapy.data.preprocessing.IndexTransformer":{add_word:[10,2,1,""],fit:[10,2,1,""],fit_transform:[10,2,1,""],transform:[10,2,1,""],vocabulary_size:[10,2,1,""]},"quapy.data.reader":{binarize:[10,5,1,""],from_csv:[10,5,1,""],from_sparse:[10,5,1,""],from_text:[10,5,1,""],reindex_labels:[10,5,1,""]},"quapy.error":{absolute_error:[8,5,1,""],acc_error:[8,5,1,""],acce:[8,5,1,""],ae:[8,5,1,""],f1_error:[8,5,1,""],f1e:[8,5,1,""],from_name:[8,5,1,""],kld:[8,5,1,""],mae:[8,5,1,""],mean_absolute_error:[8,5,1,""],mean_relative_absolute_error:[8,5,1,""],mkld:[8,5,1,""],mnkld:[8,5,1,""],mrae:[8,5,1,""],mse:[8,5,1,""],nkld:[8,5,1,""],rae:[8,5,1,""],relative_absolute_error:[8,5,1,""],se:[8,5,1,""],smooth:[8,5,1,""]},"quapy.evaluation":{artificial_prevalence_prediction:[8,5,1,""],artificial_prevalence_protocol:[8,5,1,""],artificial_prevalence_report:[8,5,1,""],evaluate:[8,5,1,""],gen_prevalence_prediction:[8,5,1,""],gen_prevalence_report:[8,5,1,""],natural_prevalence_prediction:[8,5,1,""],natural_prevalence_protocol:[8,5,1,""],natural_prevalence_report:[8,5,1,""]},"quapy.functional":{HellingerDistance:[8,5,1,""],adjusted_quantification:[8,5,1,""],artificial_prevalence_sampling:[8,5,1,""],get_nprevpoints_approximation:[8,5,1,""],normalize_prevalence:[8,5,1,""],num_prevalence_combinations:[8,5,1,""],prevalence_from_labels:[8,5,1,""],prevalence_from_probabilities:[8,5,1,""],prevalence_linspace:[8,5,1,""],strprev:[8,5,1,""],uniform_prevalence_sampling:[8,5,1,""],uniform_simplex_sampling:[8,5,1,""]},"quapy.method":{aggregative:[11,0,0,"-"],base:[11,0,0,"-"],meta:[11,0,0,"-"],neural:[11,0,0,"-"],non_aggregative:[11,0,0,"-"]},"quapy.method.aggregative":{ACC:[11,1,1,""],AdjustedClassifyAndCount:[11,4,1,""],AggregativeProbabilisticQuantifier:[11,1,1,""],AggregativeQuantifier:[11,1,1,""],CC:[11,1,1,""],ClassifyAndCount:[11,4,1,""],ELM:[11,1,1,""],EMQ:[11,1,1,""],ExpectationMaximizationQuantifier:[11,4,1,""],ExplicitLossMinimisation:[11,4,1,""],HDy:[11,1,1,""],HellingerDistanceY:[11,4,1,""],MAX:[11,1,1,""],MS2:[11,1,1,""],MS:[11,1,1,""],MedianSweep2:[11,4,1,""],MedianSweep:[11,4,1,""],OneVsAll:[11,1,1,""],PACC:[11,1,1,""],PCC:[11,1,1,""],ProbabilisticAdjustedClassifyAndCount:[11,4,1,""],ProbabilisticClassifyAndCount:[11,4,1,""],SLD:[11,4,1,""],SVMAE:[11,1,1,""],SVMKLD:[11,1,1,""],SVMNKLD:[11,1,1,""],SVMQ:[11,1,1,""],SVMRAE:[11,1,1,""],T50:[11,1,1,""],ThresholdOptimization:[11,1,1,""],X:[11,1,1,""]},"quapy.method.aggregative.ACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""],solve_adjustment:[11,2,1,""]},"quapy.method.aggregative.AggregativeProbabilisticQuantifier":{posterior_probabilities:[11,2,1,""],predict_proba:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.AggregativeQuantifier":{aggregate:[11,2,1,""],aggregative:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],learner:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.CC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ELM":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.EMQ":{EM:[11,2,1,""],EPSILON:[11,4,1,""],MAX_ITER:[11,4,1,""],aggregate:[11,2,1,""],fit:[11,2,1,""],predict_proba:[11,2,1,""]},"quapy.method.aggregative.HDy":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.OneVsAll":{aggregate:[11,2,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],classify:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],posterior_probabilities:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.aggregative.PACC":{aggregate:[11,2,1,""],classify:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.PCC":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.aggregative.ThresholdOptimization":{aggregate:[11,2,1,""],fit:[11,2,1,""]},"quapy.method.base":{BaseQuantifier:[11,1,1,""],BinaryQuantifier:[11,1,1,""],isaggregative:[11,5,1,""],isbinary:[11,5,1,""],isprobabilistic:[11,5,1,""]},"quapy.method.base.BaseQuantifier":{aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],n_classes:[11,3,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.base.BinaryQuantifier":{binary:[11,3,1,""]},"quapy.method.meta":{EACC:[11,5,1,""],ECC:[11,5,1,""],EEMQ:[11,5,1,""],EHDy:[11,5,1,""],EPACC:[11,5,1,""],Ensemble:[11,1,1,""],ensembleFactory:[11,5,1,""],get_probability_distribution:[11,5,1,""]},"quapy.method.meta.Ensemble":{VALID_POLICIES:[11,4,1,""],aggregative:[11,3,1,""],binary:[11,3,1,""],classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],probabilistic:[11,3,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.neural":{QuaNetModule:[11,1,1,""],QuaNetTrainer:[11,1,1,""],mae_loss:[11,5,1,""]},"quapy.method.neural.QuaNetModule":{device:[11,3,1,""],forward:[11,2,1,""]},"quapy.method.neural.QuaNetTrainer":{classes_:[11,3,1,""],clean_checkpoint:[11,2,1,""],clean_checkpoint_dir:[11,2,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.method.non_aggregative":{MaximumLikelihoodPrevalenceEstimation:[11,1,1,""]},"quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation":{classes_:[11,3,1,""],fit:[11,2,1,""],get_params:[11,2,1,""],quantify:[11,2,1,""],set_params:[11,2,1,""]},"quapy.model_selection":{GridSearchQ:[8,1,1,""]},"quapy.model_selection.GridSearchQ":{best_model:[8,2,1,""],classes_:[8,3,1,""],fit:[8,2,1,""],get_params:[8,2,1,""],quantify:[8,2,1,""],set_params:[8,2,1,""]},"quapy.plot":{binary_bias_bins:[8,5,1,""],binary_bias_global:[8,5,1,""],binary_diagonal:[8,5,1,""],brokenbar_supremacy_by_drift:[8,5,1,""],error_by_drift:[8,5,1,""]},"quapy.util":{EarlyStop:[8,1,1,""],create_if_not_exist:[8,5,1,""],create_parent_dir:[8,5,1,""],download_file:[8,5,1,""],download_file_if_not_exists:[8,5,1,""],get_quapy_home:[8,5,1,""],map_parallel:[8,5,1,""],parallel:[8,5,1,""],pickled_resource:[8,5,1,""],save_text_file:[8,5,1,""],temp_seed:[8,5,1,""]},quapy:{classification:[9,0,0,"-"],data:[10,0,0,"-"],error:[8,0,0,"-"],evaluation:[8,0,0,"-"],functional:[8,0,0,"-"],isbinary:[8,5,1,""],method:[11,0,0,"-"],model_selection:[8,0,0,"-"],plot:[8,0,0,"-"],util:[8,0,0,"-"]}},objnames:{"0":["py","module","Python module"],"1":["py","class","Python class"],"2":["py","method","Python method"],"3":["py","property","Python property"],"4":["py","attribute","Python attribute"],"5":["py","function","Python function"]},objtypes:{"0":"py:module","1":"py:class","2":"py:method","3":"py:property","4":"py:attribute","5":"py:function"},terms:{"0":[0,1,3,4,5,8,9,10,11],"00":[0,1,4,8],"000":1,"0001":[4,11],"000e":1,"001":[4,9,11],"005":8,"008":[],"009":1,"0097":[],"01":[8,9,11],"017":1,"018":0,"02":1,"021":0,"02552":4,"03":1,"034":1,"035":1,"037":1,"04":1,"041":1,"042":1,"046":1,"048":1,"05":[5,8,10],"055":1,"063":[0,10],"065":0,"070":1,"073":1,"075":1,"078":0,"081":[0,10],"082":[0,1],"083":0,"086":0,"091":1,"099":0,"1":[0,1,3,4,5,8,9,10,11],"10":[0,1,4,5,8,9,11],"100":[0,1,3,4,5,9,10,11],"1000":[0,4,11],"10000":4,"100000":4,"1007":[],"101":[4,8,10],"1010":4,"1024":11,"104":0,"108":1,"109":0,"11":[0,1,6,8,10],"11338":0,"114":1,"1145":[],"12":9,"120":0,"1215742":0,"1271":0,"13":[0,9],"139":0,"14":3,"142":1,"146":3,"1473":0,"148":0,"1484":0,"15":[3,8,10],"150":0,"153":0,"157":0,"158":0,"159":0,"1593":0,"1594":0,"1599":0,"161":0,"163":[0,1],"164":[0,3],"167":0,"17":0,"1771":1,"1775":[0,3],"1778":[0,3],"178":0,"1823":0,"1839":0,"18399":0,"1853":0,"19":[3,10],"193":0,"199151":0,"19982":4,"1e":9,"1st":0,"2":[0,1,3,5,8,10,11],"20":[5,8,11],"200":[1,9],"2000":0,"2002":3,"2006":11,"2008":11,"2011":4,"2013":3,"2015":[0,2,3,9,11],"2016":[3,10,11],"2017":[0,3,10,11],"2018":[0,3,10],"2019":[3,10,11],"2020":4,"2021":11,"20342":4,"206":0,"207":0,"208":0,"21":[1,3,5,8,10],"210":[],"211":0,"2126":0,"2155":0,"21591":[0,10],"218":3,"2184":0,"219e":1,"22":[0,3,9,10],"222":0,"222046":0,"226":0,"229":1,"229399":0,"23":9,"235":1,"238":0,"2390":0,"24":[0,9],"243":0,"248563":0,"24866":4,"24987":4,"25":[0,5,8,9,11],"25000":0,"256":[0,9],"26":9,"261":0,"265":0,"266":0,"267":0,"27":[1,3,9],"270":0,"2700406":[],"271":0,"272":0,"274":0,"275":1,"27th":[0,3,10],"28":3,"280":0,"281":0,"282":0,"283":[0,1],"288":0,"289":0,"2971":0,"2nd":0,"2t":[1,8],"2tp":8,"2x5fcv":0,"3":[0,1,3,5,6,8,9,10,11],"30":[0,1,3,11],"300":[0,1,9],"305":0,"306":0,"312":0,"32":[0,6],"3227":8,"3269206":[],"3269287":[],"33":[0,5,8],"331":0,"333":0,"335":0,"337":0,"34":[0,3,10,11],"341":0,"346":1,"347":0,"350":0,"351":0,"357":1,"359":0,"361":0,"366":1,"372":0,"373":0,"376132":0,"3765":0,"3813":0,"3821":[0,10],"383e":1,"387e":1,"392":0,"394":0,"399":0,"3f":[1,6],"3rd":0,"4":[0,1,3,4,5,8,11],"40":[0,3,4,11],"404333":0,"407":0,"41":3,"412":0,"412e":1,"413":0,"414":0,"417":0,"41734":4,"42":[1,8],"421":0,"4259":0,"426e":1,"427":0,"430":0,"434":0,"435":1,"43676":4,"437":0,"44":0,"4403":10,"446":0,"45":[3,5,10],"452":0,"459":1,"4601":0,"461":0,"463":0,"465":0,"466":0,"470":0,"48":3,"481":0,"48135":4,"486":0,"4898":0,"492":0,"496":0,"4960":1,"497":0,"5":[0,1,3,4,5,8,9,10,11],"50":[0,5,8,11],"500":[0,1,4,5,11],"5000":[1,5],"5005":4,"507":0,"508":0,"512":[9,11],"514":0,"515e":1,"530":0,"534":0,"535":0,"535e":1,"5379":4,"539":0,"541":1,"546":0,"5473":0,"54it":4,"55":5,"55it":4,"565":1,"569":0,"57":0,"573":0,"578":1,"583":0,"591":3,"5f":4,"5fcv":[],"5fcvx2":10,"6":[0,1,3,5,8,10],"60":0,"600":1,"601":0,"604":3,"606":0,"625":0,"627":0,"633e":1,"634":1,"64":[9,11],"640":0,"641":0,"650":0,"653":0,"654":1,"66":[1,11],"665":0,"667":0,"669":0,"67":[5,8],"683":0,"688":0,"691":0,"694582":0,"7":[1,5,8,9,11],"70":0,"700":0,"701e":1,"711":0,"717":1,"725":1,"730":0,"735":0,"740e":1,"748":0,"75":[0,5,8],"762":0,"774":0,"778":0,"787":0,"794":0,"798":0,"8":[0,1,5,10,11],"8000":0,"830":0,"837":1,"858":1,"861":0,"87":[0,3,10],"8788":0,"889504":0,"8d2fhsgcvn0aaaaa":[],"9":[0,1,3,5,8],"90":[5,8],"901":0,"909":1,"914":1,"917":0,"919":[0,10],"922":0,"923":0,"935":0,"936":0,"937":[0,10],"945":1,"95":[8,10],"9533":0,"958":0,"97":0,"979":0,"982":0,"99":8,"abstract":[3,9,10,11],"boolean":[8,10,11],"case":[0,1,3,4,5,8,10,11],"class":[0,1,3,4,5,6,8,9,10,11],"d\u00edez":3,"default":[1,3,8,9,10,11],"do":[0,1,3,4,8,9,10,11],"final":[1,3,5,11],"float":[0,3,8,9,10,11],"function":[0,1,3,4,5,6,7,9,10,11],"g\u00e1llego":[0,3,10,11],"gonz\u00e1lez":3,"import":[0,1,3,4,5,6,10,11],"int":[0,5,8,10,11],"long":[4,9],"new":[0,3,10],"p\u00e9rez":[0,3,10,11],"return":[0,1,3,4,5,8,9,10,11],"rodr\u0131":3,"short":9,"static":[3,11],"true":[0,1,3,4,5,6,8,9,10,11],"try":4,"while":[3,5,8,9,10,11],A:[0,3,8,9,10,11],As:[3,4],By:[1,3,8],For:[0,1,5,6,8,10],If:[3,5,8,10,11],In:[0,1,2,3,4,5,6,9],It:[3,4,5,8],One:[0,1,3,11],That:[1,4],The:[0,1,2,4,5,6,8,9,10,11],Then:3,These:0,To:[5,10],_:[5,8,10],__:[],__class__:5,__name__:5,_adjust:[],_ae_:[],_classify_:[],_error_name_:[],_fit_learner_:[],_kld_:[],_labelledcollection_:[],_learner_:[],_mean:[],_min_df_:[],_my:[],_nkld_:[],_posterior_probabilities_:11,_q_:[],_rae_:[],_svmperf_:[],ab:[],aboud:3,about:[0,5,8,10],abov:[0,3,5,8],absolut:[1,3,5,6,8,11],absolute_error:8,abstractmethod:3,acc:[1,3,5,6,8,11],acc_error:8,accept:3,access:[0,3,10,11],accommod:0,accord:[1,3,4,8,9,10,11],accordingli:5,accuraci:[1,5,8,11],accuracy_polici:[],achiev:[1,3,4,5],acm:[0,3,10],across:[0,1,4,5,6,8],action:0,actual:[10,11],acut:0,ad:6,adapt:8,add:[3,4,8,10],add_word:10,addit:3,addition:0,adjust:[3,6,8,11],adjusted_quantif:8,adjustedclassifyandcount:11,adopt:[3,4,10],advanc:[0,6],advantag:[3,11],ae:[1,2,5,8,11],ae_:1,affect:8,after:[8,11],afterward:11,again:5,against:5,aggreg:[1,4,5,6,7,8],aggregativeprobabilisticquantifi:[3,11],aggregativequantifi:[3,11],aggregg:[],aim:[4,5],aka:[10,11],al:[0,2,9,10,11],alaiz:3,alegr:3,alejandro:4,algorithm:[8,11],alia:[3,8,11],all:[0,1,2,3,5,8,10,11],allia:3,alloc:[8,9],allow:[0,1,2,3,5,8,9,10,11],almost:3,along:[0,3,8,11],alreadi:[3,11],also:[0,1,2,3,5,6,8,9],altern:4,although:[3,4,5,11],alwai:[3,4,5,11],among:3,amount:8,an:[0,1,2,3,4,5,6,8,9,10,11],analys:[5,6],analysi:[0,3,6,10],analyz:5,ani:[0,1,3,4,5,6,8,9,10,11],anoth:[0,1,3,5],anotherdir:8,anyon:0,anyth:11,api:6,app:[8,10,11],appeal:1,appear:5,append:5,appli:[2,3,4,5,8,9,10,11],appropri:4,approxim:[1,5,8,9,10],ar:[0,1,3,4,5,8,9,10,11],archive_filenam:8,archive_path:[],arg:[8,10],argmax:8,args_i:8,argu:4,argument:[0,1,3,5,8,10,11],arifici:[],aris:1,around:[1,10],arrai:[1,3,5,8,9,10,11],articl:[3,4],artifici:[0,1,3,4,5,6,8,10],artificial_prevalence_predict:8,artificial_prevalence_protocol:8,artificial_prevalence_report:8,artificial_prevalence_sampl:8,artificial_sampling_ev:[1,4],artificial_sampling_gener:[0,10],artificial_sampling_index_gener:10,artificial_sampling_predict:[1,5],artificial_sampling_report:1,arxiv:4,asarrai:1,asdataload:9,asonam:0,assert:10,assess:4,assign:[3,8,10],associ:[8,10],assum:[1,6,11],assumpion:11,assumpt:[1,5,6],astyp:[],attempt:[3,11],attribut:11,august:0,autom:[0,3,6],automat:[0,1],av:[3,11],avail:[0,1,2,3,5,6,9,11],averag:[1,3,8,10,11],avoid:[1,8],ax:11,axi:[5,8],b:[0,10,11],balanc:[0,4,11],band:[5,8],bar:8,barranquero:[2,3,9,11],base:[0,3,6,7,8,9],base_classifi:5,base_estim:3,base_quantifier_class:11,baseestim:[9,11],baselin:6,basequantifi:[3,8,11],basic:[5,11],batch:9,batch_siz:9,batch_size_test:9,beat:11,been:[0,3,4,5,8,10,11],befor:[3,8,9,10,11],beforehand:8,behav:[3,5],being:[4,8,11],belief:1,belong:[3,11],below:[0,2,3,5,8,10],best:[4,8,9],best_epoch:8,best_model:8,best_model_:4,best_params_:4,best_scor:8,better:4,between:[4,5,6,8,9,11],beyond:5,bia:[6,8],bias:5,bidirect:11,bin:[5,8,11],bin_bia:5,bin_diag:5,binar:[8,10],binari:[3,5,6,8,9,10,11],binary_bias_bin:[5,8],binary_bias_glob:[5,8],binary_diagon:[5,8],binary_quantifi:11,binaryquantifi:11,binom:8,block:[0,8],bool:8,both:5,bound:[8,11],box:[5,8],breast:0,brief:1,bring:11,broken:[5,8],brokenbar_supremacy_by_drift:8,budg:1,budget:[1,4],build:[],bypass:11,c:[3,4,8,9,10,11],calcul:8,calibr:3,calibratedclassifi:3,calibratedclassifiercv:3,calibratedcv:[],call:[0,1,5,8,10,11],callabl:[0,8,10],can:[0,1,2,3,4,5,8,10,11],cancer:0,cannot:[],cardiotocographi:0,care:11,carri:[3,10,11],casa_token:[],castano:[3,10],castro:3,categor:[3,10],categori:[1,8],cc:[3,5,11],ceil:8,cell:11,center:5,chang:[0,1,3,10],character:[3,6],characteriz:[0,3,10],charg:[0,8,10],chart:8,check:[3,4],checkpoint:[9,11],checkpointdir:11,checkpointnam:11,checkpointpath:9,choic:4,choos:11,chosen:[4,8],cl:0,cla:[],class2int:10,class_weight:[4,11],classes_:[8,10,11],classif:[0,1,3,7,8,10,11],classif_posterior:[3,11],classif_predict:[3,11],classif_predictions_bin:11,classifi:[1,4,5,6,8,9,11],classifier_net:9,classifiermixin:9,classifyandcount:[3,11],classmethod:[0,10,11],classnam:10,classs:8,clean_checkpoint:11,clean_checkpoint_dir:11,clear:5,clearer:1,clearli:5,clip:8,close:[1,10],closer:1,closest:11,cm:8,cmc:0,cnn:[3,11],cnnnet:[3,9,11],code:[0,3,4,5,9],codifi:10,coincid:[0,6],col:[0,10],collect:[0,8,9,10],collet:10,color:[5,8],colormap:8,column:[0,8,10],com:8,combin:[0,1,4,8,10,11],combinatio:8,combinations_budget:8,come:[0,8,10,11],commandlin:[],common:11,commonli:6,compar:[5,8],comparison:5,compat:11,compil:[2,3],complement:11,complet:[3,5,11],compon:[8,9],compress:0,comput:[1,3,5,8,11],computation:4,compute_fpr:[],compute_t:[],compute_tpr:[],concept:6,concur:[],condit:[8,11],conduct:[0,8],confer:[0,3,10],confid:8,configur:[4,8],conform:10,connect:11,consecut:[8,9,11],consid:[3,5,8,9,10,11],consist:[0,4,5,8,9,10,11],constrain:[1,5,8,10],constructor:3,consult:[0,1],contain:[1,2,3,5,8,9,10,11],contanin:8,content:7,context:8,contrast:1,control:[1,4,10],conv_block:[],conv_lay:[],conveni:8,converg:11,convert:[1,3,8,9,10,11],convolut:9,copi:[8,10],cornel:[],correct:11,correctli:8,correspond:[5,8,10],cosest:11,cost:1,costli:4,could:[0,1,3,4,5,6],count:[4,5,6,8,10,11],count_:[],counter:10,countvector:10,covari:10,cover:[1,4,9],coz:[0,3,10],cpu:[1,9,11],creat:[0,6,8],create_if_not_exist:8,create_parent_dir:8,crisp:[3,8],criteria:4,cross:[3,10,11],cs:8,csr:10,csr_matrix:10,csv:10,ctg:0,cuda:[3,9,11],cumbersom:1,cumberson:8,cumul:11,curios:5,current:[3,8,9,10,11],custom:[3,6,8,10],customarili:[3,4],cv:[3,4],cyan:5,d:11,d_:8,dat:[0,9],data:[1,3,4,5,6,7,8,9,11],data_hom:10,datafram:[1,8],dataload:9,dataset:[1,3,4,5,6,7,8,9,11],dataset_nam:10,deal:0,decaesteck:[3,11],decai:9,decid:10,decim:1,decis:[3,8,9,11],decision_funct:9,decomposit:9,dedic:[1,10],deep:[3,8,11],def:[0,1,3,5,8],defin:[0,3,8,9,10,11],degre:4,del:[0,3,10],delai:8,deliv:[3,11],denomin:11,dens:[0,11],densiti:8,depend:[0,1,4,5,8,11],describ:[3,8,11],descript:0,design:4,desir:[0,1,10],despit:1,destin:8,detail:[0,1,3,6,9,10,11],determin:[1,4,5],detriment:5,devel:10,develop:[4,6],deviat:[0,1,5,8,10],devic:[0,3,5,9,11],df:1,df_replac:[],diabet:0,diagon:[6,8],dict:[8,10,11],dictionari:[8,9,10,11],differ:[0,1,3,4,5,6,8,10,11],difficult:5,digit:0,dimens:[8,9,10,11],dimension:[8,9,10,11],dir:8,directli:[0,1,3],directori:[2,8,9,10,11],discard:8,discoveri:3,discret:8,discuss:5,disjoint:9,disk:8,displai:[1,5,8],displaystyl:8,distanc:[8,11],distant:[1,8],distribut:[0,3,5,8,10,11],diverg:[1,3,8,11],divid:8,dl:[],doabl:0,doc_embed:11,doc_embedding_s:11,doc_posterior:11,document:[0,1,3,5,9,10,11],document_embed:9,doe:[0,2,3,8,11],doi:[],done:3,dot:[5,8],dowload:8,down:[5,8,10],download:[0,2,3,8],download_fil:8,download_file_if_not_exist:8,draw:[8,10],drawn:[0,1,4,8,10],drift:6,drop:9,drop_p:9,dropout:[9,11],ds:[3,11],ds_polici:[],ds_policy_get_posterior:[],dtype:[1,10],dump:10,dure:[1,5,11],dynam:[3,9,10,11],e:[0,1,3,4,5,6,8,9,10,11],eacc:11,each:[0,1,3,4,5,8,9,10,11],earli:[8,9,11],early_stop:[],earlystop:8,easili:[0,2,5,9],ecc:11,edu:[],eemq:11,effect:3,effici:3,ehdi:11,either:[1,3,8,10,11],element:[3,10,11],elm:[3,11],els:11,em:11,emb:9,embed:[3,9,11],embed_s:9,embedding_s:9,empti:10,emq:[5,11],enabl:9,encod:10,end:[4,8,11],endeavour:6,enough:5,ensembl:[0,6,10,11],ensemblefactori:11,ensure_probabilist:[],entir:[0,3,4,5,8],entri:11,environ:[1,3,4,5,8,11],ep:[1,8],epacc:11,epoch:[8,9,11],epsilon:[1,8,11],equal:[1,8],equidist:[0,8],equip:[3,5],equival:11,err:[],err_drift:5,err_nam:8,error:[3,4,6,7,9,11],error_:[],error_by_drift:[5,8],error_funct:1,error_metr:[1,4,8],error_nam:[5,8],especi:8,establish:8,estim:[1,3,5,6,8,9,10,11],estim_prev:[1,5,8],estim_preval:[3,6,11],estimant:11,esuli:[0,2,3,9,10,11],et:[0,2,9,10,11],etc:6,eval_budget:[4,8],evalu:[0,3,4,5,6,7,9,10,11],even:8,eventu:[9,10],everi:[3,11],everyth:3,evinc:5,ex:[],exact:[0,10],exactli:0,exampl:[0,1,3,4,5,8,9,10,11],exce:8,excel:0,except:[3,8,11],exemplifi:0,exhaust:8,exhibit:[4,5],exist:8,exist_ok:8,expand_frame_repr:1,expect:[6,11],expectationmaximizationquantifi:[3,11],experi:[1,2,3,4,5,8],explain:[1,5],explicit:11,explicitlossminim:[],explicitlossminimis:11,explor:[4,8,10],express:10,ext:2,extend:[2,3,11],extens:[0,2,5],extern:3,extract:[1,8,10],f1:[1,8,9],f1_error:8,f1e:[1,8],f:[0,1,3,4,5,6,10],f_1:8,fabrizio:4,facilit:6,fact:[3,5],factor:8,factori:11,fals:[1,3,5,8,9,10,11],famili:[3,11],familiar:3,far:[8,9,10],fare:8,fast:8,faster:[0,10],feat1:10,feat2:10,featn:10,featur:[0,10],feature_extract:10,fetch:[0,6],fetch_review:[0,1,3,4,5,10,11],fetch_twitt:[0,3,6,10],fetch_ucidataset:[0,3,10],fetch_ucilabelledcollect:[0,10],ff:11,ff_layer:11,fhe:0,file:[0,5,8,9,10,11],filenam:8,fin:0,find:[0,4],finish:4,first:[0,1,2,3,5,8,10,11],fit:[1,3,4,5,6,8,9,10,11],fit_learn:[3,11],fit_transform:10,fix:[1,4],flag:8,float64:1,fn:8,fold:[3,10,11],folder:[0,11],follow:[0,1,3,4,5,6,8,11],fomart:10,for_model_select:[0,10],form:[0,8,10],forman:11,format:[0,5,10],former:[2,11],forward:[9,11],found:[0,3,4,8,9,10],four:3,fp:8,fpr:[8,11],frac:8,framework:6,frequenc:[0,10,11],from:[0,1,3,4,5,6,8,10,11],from_csv:10,from_nam:[1,8],from_spars:10,from_text:10,full:[1,8],fulli:0,func:8,further:[0,1,3,9,10,11],fusion:[0,3,10],futur:3,g:[0,1,3,4,6,8,10,11],gain:8,gao:[0,3,10,11],gap:10,gasp:[0,10],gen:8,gen_data:5,gen_fn:8,gen_prevalence_predict:8,gen_prevalence_report:8,gener:[0,1,3,4,5,8,9,10,11],generation_func:8,german:0,get:[0,1,5,8,9,10,11],get_aggregative_estim:[],get_nprevpoints_approxim:[1,8],get_param:[3,8,9,11],get_probability_distribut:11,get_quapy_hom:8,ggener:8,github:[],give:11,given:[1,3,4,8,9,10,11],global:8,goal:11,goe:4,good:[4,5],got:4,govern:1,gpu:[9,11],grant:[],greater:10,grid:[4,8,10,11],gridsearchcv:[4,11],gridsearchq:[4,8,11],ground:11,group:3,guarante:10,guez:3,gzip:0,ha:[3,4,5,8,9,10,11],haberman:[0,3],had:10,handl:0,happen:[4,5],hard:3,harder:5,harmon:8,harri:0,hat:8,have:[0,1,2,3,4,5,8,10,11],hcr:[0,3,10],hd:8,hdy:[6,11],held:[3,4,8,9,11],helling:11,hellingerdist:8,hellingerdistancei:[3,11],hellingh:8,help:5,henc:[8,10],here:[1,11],heurist:11,hidden:[5,9,11],hidden_s:9,hide:5,high:[5,8],higher:[1,5],highlight:8,hightlight:8,histogram:11,hlt:[],hold:[6,8,11],home:[8,10],hook:11,how:[0,1,3,4,5,8,10,11],howev:[0,4,5],hp:[0,3,4,10],html:10,http:[8,10],hyper:[4,8,9],hyperparam:4,hyperparamet:[3,8],i:[0,1,3,4,5,8,9,10,11],id:[0,3,10],identifi:8,idf:0,ieee:0,ignor:[8,10,11],ii:8,iid:[1,5,6],illustr:[3,4,5],imdb:[0,5,10],implement:[0,1,3,4,5,6,8,9,10,11],implicit:8,impos:[4,8],improv:[3,8,9,11],includ:[0,1,3,5,6,10,11],inconveni:8,inde:[3,4],independ:[8,11],index:[0,3,6,8,9,10,11],indextransform:10,indic:[0,1,3,4,5,8,10,11],individu:[1,3],infer:[0,10],inform:[0,1,3,4,8,10,11],infrequ:10,inherit:3,init:3,init_hidden:[],initi:[0,9],inplac:[1,3,10,11],input:[3,5,8,9,11],insight:5,inspir:3,instal:[0,3,6,9,11],instanc:[0,3,4,5,6,8,9,10,11],instanti:[0,1,3,4,9,11],instead:[1,3,4,11],integ:[3,8,9,10,11],integr:6,interest:[1,5,6,8,10],interestingli:5,interfac:[0,1,11],intern:[0,3,10],interpret:[5,6,11],interv:[1,5,8,10],introduc:1,invok:[0,1,3,8,10],involv:[2,5,8],io:[],ionospher:0,iri:0,irrespect:[5,11],isaggreg:11,isbinari:[8,10,11],isomer:8,isometr:[5,8],isprobabilist:11,isti:[],item:8,iter:[0,8,11],its:[3,4,8,9,11],itself:[3,8,11],j:[0,3,10,11],joachim:[3,9,11],job:[2,8],joblib:2,join:8,just:[1,3],k:[3,6,8,10,11],keep:8,kei:[8,10],kept:10,kernel:9,kernel_height:9,keyword:[10,11],kfcv:[0,10,11],kindl:[0,1,3,5,10,11],kl:8,kld:[1,2,8,9,11],know:3,knowledg:[0,3,10],known:[0,3,4,11],kraemer:8,kullback:[1,3,8,11],kwarg:[9,10,11],l1:[8,11],l:11,label:[0,3,4,5,6,8,9,10,11],labelledcollect:[0,3,4,8,10,11],larg:4,larger:[10,11],largest:8,last:[1,3,5,8,9,10],lastli:3,latex:5,latinn:[3,11],latter:11,layer:[3,9,11],lazi:11,lead:[1,10],learn:[1,2,3,4,6,8,9,10,11],learner:[3,4,9,11],least:[0,10],leav:10,left:10,legend:8,leibler:[1,3,8,11],len:8,length:[9,10],less:[8,10],let:[1,3],level:[],leverag:3,leyend:8,like:[0,1,3,5,8,9,10,11],likelihood:11,limit:[5,8,10,11],line:[1,3,8],linear:[5,11],linear_model:[1,3,4,6,9],linearsvc:[3,5,10],link:[],linspac:5,list:[0,5,8,9,10,11],listedcolormap:8,literatur:[0,1,4,6],load:[0,3,8,10,11],loader:[0,10],loader_func:[0,10],loader_kwarg:10,local:8,log:[8,10],logist:[1,3,9,11],logisticregress:[1,3,4,6,9,11],logscal:8,logspac:[4,11],longer:8,longest:9,look:[0,1,3,5,11],loop:11,loss:[6,9,11],low:[5,8,9],lower:[5,8,11],lower_is_bett:8,lowest:5,lowranklogisticregress:9,lr:[1,3,9,11],lstm:[3,9,11],lstm_class_nlay:9,lstm_hidden_s:11,lstm_nlayer:11,lstmnet:9,m:[3,8,11],machin:[1,4,6],macro:8,made:[0,2,8,10,11],mae:[1,4,6,8,9,11],mae_loss:11,mai:8,main:5,maintain:[3,11],make:[0,1,3,11],makedir:8,mammograph:0,manag:[0,3,10],mani:[1,3,4,5,6,8,10,11],manner:0,manual:0,map:[1,9],map_parallel:8,margin:9,mass:8,math:[],mathcal:8,matplotlib:[2,8],matric:[0,5,10],matrix:[5,8,11],max:11,max_it:11,max_sample_s:11,maxim:[6,11],maximum:[1,8,9,11],maximumlikelihoodprevalenceestim:11,md:[],mean:[0,1,3,4,5,6,8,9,10,11],mean_absolute_error:8,mean_relative_absolute_error:8,measur:[2,3,4,5,6,8,11],median:11,mediansweep2:11,mediansweep:11,member:[3,11],memori:9,mention:3,merg:5,met:10,meta:[6,7,8],meth:[],method:[0,1,4,5,6,7,8],method_data:5,method_nam:[5,8],method_ord:8,metric:[1,3,4,6,8,11],might:[1,8,10],min_df:[1,3,4,5,10,11],min_po:11,mine:[0,3],minim:[8,11],minimum:[10,11],minimun:10,mining6:10,minu:8,misclassif:11,miss:8,mixtur:[3,11],mkld:[1,8,11],ml:10,mlpe:11,mnkld:[1,8,11],mock:[8,9],modal:4,model:[0,1,5,6,8,9,11],model_select:[4,7,11],modifi:[3,8],modul:[0,1,3,5,6,7],moment:[0,3],monitor:8,more:[3,5,8,11],moreo:[0,3,4,10,11],most:[0,3,5,6,8,10,11],movi:0,mrae:[1,6,8,9,11],ms2:11,ms:11,mse:[1,3,6,8,11],msg:[],multiclass:8,multipli:8,multiprocess:8,multivari:[3,9],must:[3,10,11],mutual:11,my:[],my_arrai:8,my_collect:10,my_custom_load:0,my_data:0,mycustomloss:3,n:[0,1,8,9,11],n_bin:[5,8],n_class:[1,3,8,9,10,11],n_classes_:11,n_compon:9,n_dimens:9,n_epoch:11,n_featur:9,n_instanc:[8,9,11],n_job:[1,3,4,8,10,11],n_preval:[0,8,10],n_prevpoint:[1,4,5,8],n_repeat:[1,8],n_repetit:[1,4,5,8],n_sampl:[8,9],name:[5,8,9,10,11],nativ:6,natur:[1,8,10,11],natural_prevalence_predict:8,natural_prevalence_protocol:8,natural_prevalence_report:8,natural_sampling_gener:10,natural_sampling_index_gener:10,nbin:[5,8],ndarrai:[1,3,8,10,11],necessarili:[],need:[0,3,8,10,11],neg:[0,5,8,11],nest:[],net:9,network:[0,8,9,10,11],neural:[0,7,8,10],neuralclassifiertrain:[3,9,11],neutral:0,next:[4,8,9,10],nfold:[0,10],nkld:[1,2,6,8,9,11],nn:[9,11],nogap:10,non:3,non_aggreg:[7,8],none:[1,4,8,9,10,11],nonetheless:4,nor:3,normal:[0,1,3,8,10,11],normalize_preval:8,note:[1,3,4,5,8,10],noth:11,now:5,nowadai:3,np:[1,3,4,5,8,10,11],npp:[8,10],nprevpoint:[],nrepeat:[0,10],num_prevalence_combin:[1,8],number:[0,1,3,5,8,9,10,11],numer:[0,1,3,6,10,11],numpi:[2,4,8,9,11],o_l6x_pcf09mdetq4tu7jk98mxfbgsxp9zso14jkuiyudgfg0:[],object:[0,8,9,10,11],observ:1,obtain:[1,4,8,11],obtaind:8,obvious:8,occur:[5,10],occurr:10,octob:[0,3],off:9,offer:[3,6],older:2,omd:[0,10],ommit:[1,8],onc:[1,3,5,8],one:[0,1,3,4,5,8,10,11],ones:[1,3,5,8,10],onevsal:[3,11],onli:[0,3,5,8,9,10,11],open:[0,6,10],oper:3,opt:4,optim:[2,3,4,8,9,11],optimize_threshold:[],option:[0,1,3,5,8,10,11],order:[0,2,3,5,8,10,11],order_bi:11,org:10,orient:[3,6,8,11],origin:[0,3,10],os:[0,8],other:[1,3,5,6,8,10,11],otherwis:[0,3,8,10,11],our:[],out:[3,4,5,8,9,10,11],outcom:5,outer:8,outlier:8,output:[0,1,3,4,8,9,10,11],outsid:11,over:[3,4,8],overal:1,overestim:5,overrid:3,overridden:[3,11],own:4,p:[0,3,8,10,11],p_hat:8,p_i:8,pacc:[1,3,5,8,11],packag:[0,2,3,6,7],pad:[9,10],pad_length:9,padding_length:9,page:[0,2,6],pageblock:0,pair:[0,8,11],panda:[1,2,8],paper:[0,3],parallel:[1,3,8,10,11],param:[4,9,11],param_grid:[4,8,11],param_mod_sel:11,param_model_sel:11,paramet:[1,3,4,8,9,10,11],parent:8,part:[3,10],particular:[0,1,3],particularli:1,pass:[0,1,5,8,9,11],past:1,patch:[2,3,9,11],path:[0,3,5,8,9,10,11],patienc:[8,9,11],pattern:3,pca:[],pcalr:[],pcc:[3,4,5,11],pd:1,pdf:5,peopl:[],percentil:8,perf:[6,9,11],perform:[1,3,4,5,6,8,9,11],perman:8,phase:11,phonem:0,pick:4,pickl:[3,8,10,11],pickle_path:8,pickled_resourc:8,pii:[],pip:2,pipelin:[],pkl:8,plai:0,plan:3,pleas:3,plot:[6,7],png:5,point:[0,1,3,8,10],polici:[3,11],popular:6,portion:4,pos_class:[8,10],posit:[0,3,5,8,10,11],possibl:[1,3,8],post:8,posterior:[3,8,9,11],posterior_prob:[3,11],postpon:3,potter:0,pp:[0,3],pprox:[],practic:[0,4],pre:[0,3],prec:[0,8],preced:10,precis:[0,1,8],preclassifi:3,predefin:10,predict:[3,4,5,8,9,11],predict_proba:[3,9,11],predictor:1,prefer:8,preliminari:11,prepare_svmperf:[2,3],preprint:4,preprocess:[0,1,3,7,8,11],present:[0,3,10],preserv:[1,5,8,10],pretti:5,prev:[0,1,8,10],prevail:3,preval:[0,1,3,4,5,6,8,10,11],prevalence_estim:8,prevalence_from_label:8,prevalence_from_prob:8,prevalence_linspac:8,prevel:11,previou:3,previous:[],prevs_estim:11,prevs_hat:[1,8],princip:9,print:[0,1,3,4,6,9,10],prior:[1,3,4,5,6,8,11],priori:3,probabilist:[3,11],probabilisticadjustedclassifyandcount:11,probabilisticclassifyandcount:11,probabl:[1,3,4,5,6,8,9,11],problem:[0,3,5,8,10,11],procedur:[3,6],proceed:[0,3,10],process:[3,4,8],processor:3,procol:1,produc:[0,1,5,8],product:3,progress:[8,10],properli:0,properti:[3,8,9,10,11],proport:[3,4,8,9,10,11],propos:[2,3,11],protocl:8,protocol:[0,3,4,5,6,8,10,11],provid:[0,3,5,6,11],ptecondestim:11,ptr:[3,11],ptr_polici:[],purpos:[0,11],put:11,python:[0,6],pytorch:[2,11],q:[0,2,3,8,9,11],q_i:8,qacc:9,qdrop_p:11,qf1:9,qgm:9,qp:[0,1,3,4,5,6,8,10,11],quanet:[2,6,9,11],quanetmodul:11,quanettrain:11,quantif:[0,1,6,8,9,10,11],quantifi:[3,4,5,6,8,11],quantification_error:8,quantiti:8,quapi:[0,1,2,3,4,5],quapy_data:0,quay_data:10,question:8,quevedo:[0,3,10],quick:[],quit:8,r:[0,3,8,10],rac:[],rae:[1,2,8,11],rais:[3,8,11],rand:8,random:[1,3,4,5,8,10],random_se:[1,8],random_st:10,randomli:0,rang:[0,5,8,11],rank:[3,9],rare:10,rate:[3,8,9,11],rather:[1,4],raw:10,rb:0,re:[3,4,10],reach:11,read:10,reader:[7,8],readm:[],real:[8,9,10,11],reason:[3,5,6],recal:8,receiv:[0,3,5],recip:11,recognit:3,recommend:[1,5,11],recomput:11,recurr:[0,3,10],recurs:11,red:0,red_siz:[3,11],reduc:[0,10],reduce_column:[0,10],refer:[9,10],refit:[4,8],regard:4,regardless:10,regim:8,region:8,regist:11,regress:9,regressor:[1,3],reindex_label:10,reiniti:9,rel:[1,3,8,10,11],relative_absolute_error:8,reli:[1,3,11],reliabl:3,rememb:5,remov:[10,11],repeat:[8,10],repetit:8,repl:[],replac:[0,3,10],replic:[1,4,8],report:[1,8],repositori:[0,10],repr_siz:9,repres:[1,3,5,8,10,11],represent:[0,3,8,9,11],reproduc:10,request:[0,8,10],requir:[0,1,3,6,9],reset_net_param:9,resourc:8,resp:11,respect:[0,1,5,8,11],respond:3,rest:[8,10,11],result:[1,2,3,4,5,6,8,11],retain:[0,3,9,11],retrain:4,return_constrained_dim:8,reus:[0,3,8],review:[5,6,10],reviews_sentiment_dataset:[0,10],rewrit:5,right:[4,8,10],role:0,root:6,roughli:0,round:10,routin:[8,10,11],row:[8,10],run:[0,1,2,3,4,5,8,10,11],s003132031400291x:[],s10618:[],s:[0,1,3,4,5,8,9,10,11],saeren:[3,11],sai:[],said:3,same:[0,3,5,8,10,11],sampl:[0,1,3,4,5,6,8,9,10,11],sample_s:[0,1,3,4,5,8,10,11],sampling_from_index:[0,10],sampling_index:[0,10],sander:[0,10],save:[5,8],save_or_show:[],save_text_fil:8,savepath:[5,8],scale:8,scall:10,scenario:[1,3,4,5,6],scienc:3,sciencedirect:[],scikit:[2,3,4,10],scipi:[2,10],score:[0,1,4,8,9,10],script:[1,2,3,6,11],se:[1,8],search:[3,4,6,8],sebastiani:[0,3,4,10,11],second:[0,1,3,5,8,10],secondari:8,section:4,see:[0,1,2,3,4,5,6,8,9,10,11],seed:[1,4,8],seem:3,seemingli:5,seen:[5,8,11],select:[0,3,6,8,10,11],selector:3,self:[3,8,9,10,11],semeion:0,semev:0,semeval13:[0,10],semeval14:[0,10],semeval15:[0,10],semeval16:[0,6,10],sentenc:10,sentiment:[3,6,10],separ:[8,10],sequenc:8,seri:0,serv:3,set:[0,1,3,4,5,6,8,9,10,11],set_opt:1,set_param:[3,8,9,11],set_siz:[],sever:0,sh:[2,3],shape:[5,8,9,10,11],share:[0,10],shift:[1,4,6,8,11],shorter:9,shoud:3,should:[0,1,3,4,5,6,9,10,11],show:[0,1,3,4,5,8,9,10,11],show_dens:8,show_std:[5,8],showcas:5,shown:[1,5,8],shuffl:[9,10],side:8,sign:8,signific:1,significantli:8,silent:[8,11],simeq:[],similar:[8,11],simpl:[0,3,5,11],simplest:3,simplex:[0,8],simpli:[1,2,3,4,5,6,8,11],sinc:[0,1,3,5,8,10,11],singl:[1,3,6,11],size:[0,1,3,8,9,10,11],sklearn:[1,3,4,5,6,9,10,11],sld:[3,11],slice:8,smooth:[1,8],smooth_limits_epsilon:8,so:[0,1,3,5,8,9,10,11],social:[0,3,10],soft:3,softwar:0,solid:5,solut:8,solv:[4,11],solve_adjust:11,some:[0,1,3,5,8,10,11],some_arrai:8,sometim:1,sonar:0,sort:11,sourc:[2,3,6,9],sout:[],space:[0,4,8,9],spambas:0,spars:[0,10],special:[0,5,10],specif:[3,4],specifi:[0,1,3,5,8,9,10],spectf:0,spectrum:[0,1,4,5,8],speed:[3,11],split:[0,3,4,5,8,9,10,11],split_stratifi:10,splitstratifi:10,spmatrix:10,springer:[],sqrt:8,squar:[1,3,8],sst:[0,10],stabil:[1,11],stabl:10,stackexchang:8,stand:[8,11],standard:[0,1,5,8,10,11],star:8,start:4,stat:10,state:8,statist:[0,1,8,11],stats_siz:11,std:9,stdout:8,step:[5,8],stop:[8,9,11],store:[0,9,10,11],str:[0,8,10],strategi:[3,4],stratif:10,stratifi:[0,3,10,11],stride:9,string:[1,8,10,11],strongli:[4,5],strprev:[0,1,8],structur:[3,11],studi:[0,3,10],style:10,subclass:11,subdir:8,subinterv:5,sublinear_tf:10,submit:0,submodul:7,subobject:[],suboptim:4,subpackag:7,subsequ:10,subtract:[0,8,10],subtyp:10,suffic:5,suffici:[],sum:[8,11],sum_:8,summar:0,supervis:[4,6],support:[3,6,9,10],surfac:10,surpass:1,svm:[3,5,6,9,10,11],svm_light:[],svm_perf:[],svm_perf_classifi:9,svm_perf_learn:9,svm_perf_quantif:[2,3],svmae:[3,11],svmkld:[3,11],svmnkld:[3,11],svmperf:[2,3,7,8,11],svmperf_bas:[9,11],svmperf_hom:3,svmq:[3,11],svmrae:[3,11],sweep:11,syntax:5,system:[4,11],t50:11,t:[0,1,3,8],tab10:8,tail:8,tail_density_threshold:8,take:[0,3,5,8,10,11],taken:[3,8,9,10],target:[3,5,6,8,9,11],task:[3,4,10],te:[8,10],temp_se:8,tempor:8,tend:5,tendenc:5,tensor:9,term:[0,1,3,4,5,6,8,9,10,11],test:[0,1,3,4,5,6,8,9,10,11],test_bas:[],test_dataset:[],test_method:[],test_path:[0,10],test_sampl:8,test_split:10,text2tfidf:[0,1,3,10],text:[0,3,8,9,10,11],textclassifiernet:9,textual:[0,6,10],tf:[0,10],tfidf:[0,4,5,10],tfidfvector:10,than:[1,4,5,8,9,10],thei:[0,3,11],them:[0,3,11],theoret:4,thereaft:1,therefor:[8,10],thi:[0,1,2,3,4,5,6,8,9,10,11],thing:3,third:[1,5],thorsten:9,those:[1,3,4,5,8,9,11],though:[3,8],three:[0,5],threshold:[8,11],thresholdoptim:11,through:[3,8],thu:[3,4,5,8,11],tictacto:0,time:[0,1,3,8,10],timeout:8,timeouterror:8,timer:8,titl:8,tj:[],tn:8,token:[0,9,10],tool:[1,6],top:[3,8,11],torch:[3,9,11],torchdataset:9,total:8,toward:[5,10],tp:8,tpr:[8,11],tqdm:2,tr:10,tr_iter_per_poch:11,tr_prev:[5,8,11],track:8,trade:9,tradition:1,train:[0,1,3,4,5,6,8,9,10,11],train_path:[0,10],train_prev:[5,8],train_prop:10,train_siz:10,train_val_split:[],trainer:9,training_help:[],training_preval:5,training_s:5,transact:3,transform:[0,9,10,11],transfus:0,trivial:3,true_prev:[1,5,8],true_preval:6,truncatedsvd:9,truth:11,ttest_alpha:8,tupl:[8,10,11],turn:4,tweet:[0,3,10],twitter:[6,10],twitter_sentiment_datasets_test:[0,10],twitter_sentiment_datasets_train:[0,10],two:[0,1,3,4,5,8,10,11],txt:8,type:[0,3,8,10,11],typic:[1,4,5,8,9,10,11],u1:10,uci:[6,10],uci_dataset:10,unabl:0,unadjust:5,unalt:9,unbias:5,uncompress:0,under:1,underestim:5,underlin:8,understand:8,unfortun:5,unifi:[0,11],uniform:[8,10],uniform_prevalence_sampl:8,uniform_sampl:10,uniform_sampling_index:10,uniform_simplex_sampl:8,uniformli:[8,10],union:[8,11],uniqu:10,unit:[0,8],unix:0,unk:10,unknown:10,unlabel:11,unless:11,unlik:[1,4],until:11,unus:[8,9],up:[3,4,8,9,11],updat:11,url:8,us:[0,1,3,4,5,6,8,9,10,11],user:[0,1,5],utf:10,util:[7,9],v:3,va_iter_per_poch:11,val:[0,10],val_split:[3,4,8,9,11],valid:[0,1,3,4,5,8,9,10,11],valid_loss:[3,9,11],valid_polici:11,valu:[0,1,3,8,9,10,11],variabl:[1,3,5,8,10],varianc:[0,5],variant:[5,6,11],varieti:4,variou:[1,5],vector:[0,8,9,10],verbos:[0,1,4,8,9,10,11],veri:[3,5],versatil:6,version:[2,9,11],vertic:8,vertical_xtick:8,via:[0,2,3,11],view:5,visual:[5,6],vline:8,vocab_s:9,vocabulari:[9,10],vocabulary_s:[3,9,10,11],vs:[3,8],w:[0,3,10],wa:[0,3,5,8,10,11],wai:[1,11],wait:9,want:[3,4],warn:10,wb:[0,10],wdbc:0,we:[0,1,3,4,5,6],weight:[9,10],weight_decai:9,well:[0,3,4,5,11],were:0,what:3,whcih:10,when:[0,1,3,4,5,8,9,10],whenev:[5,8],where:[3,5,8,9,10,11],wherebi:4,whether:[8,9,10,11],which:[0,1,3,4,5,8,9,10,11],white:0,whole:[0,1,3,4,8],whose:[10,11],why:3,wide:5,wiki:[0,3],wine:0,within:[8,11],without:[1,3,8,10],word:[1,3,6,9,10,11],work:[1,3,4,5,10],worker:[1,8,10,11],wors:[4,5,8],would:[0,1,3,5,6,8,10,11],wrapper:[8,9,10,11],written:6,www:[],x2:10,x:[5,8,9,10,11],x_error:8,xavier:9,xavier_uniform:9,xlrd:[0,2],xy:10,y:[5,8,9,10,11],y_:[],y_error:8,y_i:11,y_j:11,y_pred:8,y_true:8,ye:[],yeast:[0,10],yield:[5,8,10,11],yin:[],you:[2,3],your:3,z:[0,10],zero:[0,8],zfthyovrzwxmgfzylqw_y8cagg:[],zip:[0,5]},titles:["Datasets","Evaluation","Installation","Quantification Methods","Model Selection","Plotting","Welcome to QuaPy\u2019s documentation!","quapy","quapy package","quapy.classification package","quapy.data package","quapy.method package"],titleterms:{"function":8,A:6,The:3,ad:0,aggreg:[3,11],base:[10,11],bia:5,classif:[4,9],classifi:3,content:[6,8,9,10,11],count:3,custom:0,data:[0,10],dataset:[0,10],diagon:5,distanc:3,document:6,drift:5,emq:3,ensembl:3,error:[1,5,8],evalu:[1,8],ex:[],exampl:6,expect:3,explicit:3,featur:6,get:[],hdy:3,helling:3,indic:6,instal:2,introduct:6,issu:0,learn:0,loss:[2,3,4],machin:0,maxim:3,measur:1,meta:[3,11],method:[3,9,11],minim:3,model:[3,4],model_select:8,modul:[8,9,10,11],network:3,neural:[3,9,11],non_aggreg:11,orient:[2,4],packag:[8,9,10,11],perf:2,plot:[5,8],preprocess:10,process:0,protocol:1,quanet:3,quantif:[2,3,4,5],quapi:[6,7,8,9,10,11],quick:6,reader:10,readm:[],requir:2,review:0,s:6,select:4,sentiment:0,start:[],submodul:[8,9,10,11],subpackag:8,svm:2,svmperf:9,tabl:6,target:4,test:[],test_bas:[],test_dataset:[],test_method:[],titl:[],twitter:0,uci:0,util:8,variant:3,welcom:6,y:3}}) \ No newline at end of file +Search.setIndex({"docnames": ["Datasets", "Evaluation", "Installation", "Methods", "Model-Selection", "Plotting", "index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["Datasets.md", "Evaluation.md", "Installation.rst", "Methods.md", "Model-Selection.md", "Plotting.md", "index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Datasets", "Evaluation", "Installation", "Quantification Methods", "Model Selection", "Plotting", "Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"quapi": [0, 1, 2, 3, 4, 5], "make": [0, 1, 3, 8, 11], "avail": [0, 1, 2, 3, 5, 6, 9, 11], "sever": 0, "have": [0, 1, 2, 3, 4, 5, 8, 10, 11], "been": [0, 3, 4, 5, 8, 9, 10, 11], "us": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "quantif": [0, 1, 6, 8, 9, 10, 11], "literatur": [0, 1, 4, 6], "well": [0, 3, 4, 5, 11], "an": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "interfac": [0, 1, 11], "allow": [0, 1, 2, 3, 5, 8, 9, 10, 11], "anyon": 0, "import": [0, 1, 3, 4, 5, 6, 10, 11], "A": [0, 3, 8, 9, 10, 11], "object": [0, 8, 9, 10, 11], "i": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "roughli": 0, "pair": [0, 8], "labelledcollect": [0, 3, 4, 8, 10, 11], "one": [0, 1, 3, 4, 5, 8, 10, 11], "plai": 0, "role": 0, "train": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "set": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "anoth": [0, 1, 3, 5], "test": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "class": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "consist": [0, 4, 5, 8, 9, 10, 11], "iter": [0, 8, 11], "instanc": [0, 3, 4, 5, 6, 8, 9, 10, 11], "label": [0, 3, 4, 5, 6, 8, 9, 10, 11], "thi": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "handl": 0, "most": [0, 3, 5, 6, 8, 10, 11], "sampl": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "function": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11], "take": [0, 3, 5, 8, 10, 11], "look": [0, 1, 3, 5, 11], "follow": [0, 1, 3, 4, 5, 6, 8, 11], "code": [0, 3, 4, 5, 9], "qp": [0, 1, 3, 4, 5, 6, 8, 10, 11], "f": [0, 1, 3, 4, 5, 6, 10], "1st": 0, "posit": [0, 3, 5, 8, 10, 11], "document": [0, 1, 3, 5, 9, 10, 11], "2nd": 0, "onli": [0, 3, 5, 8, 9, 10, 11], "neg": [0, 5, 8, 11], "neutral": 0, "3rd": 0, "2": [0, 1, 3, 5, 8, 10, 11], "0": [0, 1, 3, 4, 5, 8, 9, 10, 11], "1": [0, 1, 3, 4, 5, 8, 9, 10, 11], "print": [0, 1, 3, 4, 6, 9, 10], "strprev": [0, 1, 8], "preval": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "prec": [0, 8], "output": [0, 1, 3, 4, 9, 10, 11], "show": [0, 1, 3, 4, 5, 8, 9, 10, 11], "digit": 0, "precis": [0, 1, 8], "17": 0, "50": [0, 5, 8, 11], "33": [0, 5, 8], "One": [0, 1, 3, 11], "can": [0, 1, 2, 3, 4, 5, 8, 10, 11], "easili": [0, 2, 5, 9], "produc": [0, 1, 5, 8], "new": [0, 3, 8, 9, 10], "desir": [0, 1, 10], "sample_s": [0, 1, 3, 4, 5, 8, 11], "10": [0, 1, 4, 5, 8, 9, 11], "prev": [0, 1, 8, 10], "4": [0, 1, 3, 4, 5, 10, 11], "5": [0, 1, 3, 4, 5, 8, 9, 10, 11], "which": [0, 1, 3, 4, 5, 8, 9, 10, 11], "40": [0, 3, 4, 11], "made": [0, 2, 8, 10, 11], "across": [0, 1, 4, 5, 6, 8, 11], "differ": [0, 1, 3, 4, 5, 6, 8, 10, 11], "run": [0, 1, 2, 3, 4, 5, 8, 10, 11], "e": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "g": [0, 1, 3, 4, 6, 8, 10, 11], "method": [0, 1, 4, 5, 6, 8], "same": [0, 3, 5, 8, 10, 11], "exact": [0, 10], "retain": [0, 3, 9, 11], "index": [0, 3, 6, 8, 9, 10, 11], "gener": [0, 1, 3, 4, 5, 8, 9, 10, 11], "sampling_index": [0, 10], "sampling_from_index": [0, 10], "also": [0, 1, 2, 3, 5, 6, 8, 9], "implement": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "artifici": [0, 1, 3, 4, 5, 6, 8], "protocol": [0, 3, 4, 5, 6, 7, 10, 11], "via": [0, 2, 3, 8, 9, 11], "python": [0, 6], "": [0, 1, 3, 4, 5, 8, 9, 10, 11], "seri": [0, 10], "equidist": [0, 8], "rang": [0, 5, 8, 11], "entir": [0, 3, 4, 5, 8], "spectrum": [0, 1, 4, 5, 8], "simplex": [0, 8], "space": [0, 4, 8, 9], "artificial_sampling_gener": 0, "100": [0, 1, 3, 4, 5, 8, 9, 10, 11], "n_preval": [0, 8], "each": [0, 1, 3, 4, 5, 8, 9, 10, 11], "valid": [0, 1, 3, 4, 5, 8, 9, 10, 11], "combin": [0, 1, 4, 8, 11], "origin": [0, 3, 8, 10], "from": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "split": [0, 3, 4, 5, 8, 9, 10, 11], "point": [0, 1, 3, 8, 10], "25": [0, 5, 8, 9, 11], "75": [0, 5, 8], "00": [0, 1, 4], "see": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "evalu": [0, 3, 4, 5, 6, 7, 9, 10, 11], "wiki": [0, 3], "further": [0, 1, 3, 9, 10, 11], "detail": [0, 1, 3, 6, 9, 10, 11], "how": [0, 1, 3, 4, 5, 8, 10, 11], "properli": 0, "three": [0, 5], "about": [0, 5, 8, 10], "kindl": [0, 1, 3, 5, 10, 11], "devic": [0, 3, 5, 9, 11], "harri": 0, "potter": 0, "known": [0, 3, 4, 8, 11], "imdb": [0, 5, 10], "movi": 0, "fetch": [0, 6], "unifi": [0, 11], "For": [0, 1, 5, 6, 8, 10], "exampl": [0, 1, 3, 4, 5, 8, 9, 10, 11], "fetch_review": [0, 1, 3, 4, 5, 10, 11], "These": [0, 9], "esuli": [0, 2, 3, 9, 10, 11], "moreo": [0, 3, 4, 10, 11], "sebastiani": [0, 3, 4, 10, 11], "2018": [0, 3, 10], "octob": [0, 3], "recurr": [0, 3, 10], "neural": [0, 8, 10], "network": [0, 8, 9, 10, 11], "In": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "proceed": [0, 3, 10], "27th": [0, 3, 10], "acm": [0, 3, 10, 11], "intern": [0, 3, 9, 10], "confer": [0, 3, 9, 10], "inform": [0, 1, 3, 4, 8, 9, 10, 11], "knowledg": [0, 3, 10], "manag": [0, 3, 10], "pp": [0, 3, 9], "1775": [0, 3], "1778": [0, 3], "The": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11], "list": [0, 5, 8, 9, 10, 11], "id": [0, 3, 10], "reviews_sentiment_dataset": [0, 10], "some": [0, 1, 3, 5, 8, 10, 11], "statist": [0, 1, 8, 11], "fhe": 0, "ar": [0, 1, 3, 4, 5, 8, 9, 10, 11], "summar": 0, "below": [0, 2, 3, 5, 8, 10], "size": [0, 1, 3, 8, 9, 10, 11], "type": [0, 3, 8, 10, 11], "hp": [0, 3, 4, 10], "9533": 0, "18399": 0, "018": 0, "982": 0, "065": 0, "935": 0, "text": [0, 3, 8, 9, 10, 11], "3821": [0, 10], "21591": [0, 10], "081": [0, 10], "919": [0, 10], "063": [0, 10], "937": [0, 10], "25000": 0, "500": [0, 1, 4, 5, 11], "11": [0, 1, 6, 8], "analysi": [0, 3, 6, 10], "access": [0, 3, 10, 11], "were": 0, "tf": [0, 10], "idf": 0, "format": [0, 5, 10, 11], "present": [0, 3, 10], "two": [0, 1, 3, 4, 5, 8, 10, 11], "val": [0, 9, 10], "model": [0, 1, 5, 6, 8, 9, 11], "select": [0, 3, 6, 8, 10, 11], "purpos": [0, 11], "exemplifi": 0, "load": [0, 3, 8, 10, 11], "fetch_twitt": [0, 3, 6, 10], "gasp": [0, 10], "for_model_select": [0, 10], "true": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "gao": [0, 3, 10, 11], "w": [0, 3, 10], "2015": [0, 2, 3, 9, 11], "august": 0, "tweet": [0, 3, 10], "classif": [0, 1, 3, 6, 8, 10, 11], "ieee": 0, "advanc": [0, 6], "social": [0, 3, 10], "mine": [0, 3], "asonam": 0, "97": 0, "104": 0, "semeval13": [0, 10], "semeval14": [0, 10], "semeval15": [0, 10], "share": [0, 10], "semev": 0, "mean": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "would": [0, 1, 3, 5, 6, 10, 11], "get": [0, 1, 5, 8, 9, 10, 11], "when": [0, 1, 3, 4, 5, 8, 9, 10], "request": [0, 8, 10, 11], "ani": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "them": [0, 3, 11], "consult": [0, 1], "twitter_sentiment_datasets_test": [0, 10], "9": [0, 1, 3, 5, 8], "replac": [0, 3, 10], "twitter_sentiment_datasets_train": [0, 10], "found": [0, 3, 4, 8, 9, 10], "featur": [0, 10], "3": [0, 1, 3, 5, 6, 8, 9, 10, 11], "8788": 0, "3765": 0, "694582": 0, "421": 0, "496": 0, "082": [0, 1], "407": 0, "507": 0, "086": 0, "spars": [0, 10], "hcr": [0, 3, 10], "1594": 0, "798": 0, "222046": 0, "546": 0, "211": 0, "243": 0, "640": 0, "167": 0, "193": 0, "omd": [0, 10], "1839": 0, "787": 0, "199151": 0, "463": 0, "271": 0, "266": 0, "437": 0, "283": [0, 1], "280": 0, "sander": [0, 10], "2155": 0, "923": 0, "229399": 0, "161": 0, "691": 0, "148": 0, "164": [0, 3], "688": 0, "11338": 0, "3813": 0, "1215742": 0, "159": 0, "470": 0, "372": 0, "158": 0, "430": 0, "412": 0, "1853": 0, "109": 0, "361": 0, "530": 0, "2390": 0, "153": 0, "413": 0, "434": 0, "semeval16": [0, 6, 10], "8000": 0, "2000": 0, "889504": 0, "157": 0, "351": 0, "492": 0, "163": [0, 1], "341": 0, "497": 0, "sst": [0, 10], "2971": 0, "1271": 0, "376132": 0, "261": 0, "452": 0, "288": 0, "207": 0, "481": 0, "312": 0, "wa": [0, 3, 5, 8, 10, 11], "2184": 0, "936": 0, "248563": 0, "305": 0, "414": 0, "281": 0, "282": 0, "446": 0, "272": 0, "wb": [0, 10], "4259": 0, "1823": 0, "404333": 0, "270": 0, "392": 0, "337": 0, "274": 0, "335": 0, "32": [0, 6], "repositori": [0, 10], "p\u00e9rez": [0, 3, 10, 11], "g\u00e1llego": [0, 3, 10, 11], "p": [0, 3, 8, 9, 10, 11], "quevedo": [0, 3, 10], "j": [0, 3, 10, 11], "r": [0, 3, 8, 10], "del": [0, 3, 10], "coz": [0, 3, 10], "2017": [0, 3, 10, 11], "ensembl": [0, 6, 10, 11], "problem": [0, 3, 5, 8, 10, 11], "characteriz": [0, 3, 10], "chang": [0, 1, 3, 10], "distribut": [0, 3, 5, 8, 10, 11], "case": [0, 1, 3, 4, 5, 8, 9, 10, 11], "studi": [0, 3, 10], "fusion": [0, 3, 10], "34": [0, 3, 10, 11], "87": [0, 3, 10], "doe": [0, 2, 3, 8, 11], "exactli": 0, "coincid": [0, 6], "et": [0, 2, 9, 10, 11], "al": [0, 2, 9, 10, 11], "sinc": [0, 1, 3, 5, 10, 11], "we": [0, 1, 3, 4, 5, 6, 10], "unabl": 0, "find": [0, 4, 11], "diabet": 0, "phonem": 0, "call": [0, 1, 5, 8, 10, 11], "fetch_ucidataset": [0, 3, 10], "yeast": [0, 10], "verbos": [0, 1, 4, 8, 9, 10, 11], "return": [0, 1, 3, 4, 5, 8, 9, 10, 11], "randomli": [0, 10], "drawn": [0, 1, 4, 8, 10], "stratifi": [0, 3, 9, 10, 11], "manner": [0, 9, 11], "whole": [0, 1, 3, 4, 8, 9], "collect": [0, 8, 9, 10], "70": 0, "30": [0, 1, 3, 11], "respect": [0, 1, 5, 8, 11], "option": [0, 1, 3, 5, 10, 11], "indic": [0, 1, 3, 4, 5, 8, 9, 10, 11], "descript": [0, 10], "should": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "standard": [0, 1, 5, 8, 9, 10, 11], "paper": [0, 3, 9, 11], "submit": 0, "kfcv": [0, 9, 10, 11], "order": [0, 2, 3, 5, 8, 10, 11], "accommod": 0, "practic": [0, 4], "could": [0, 1, 3, 4, 5, 6], "first": [0, 1, 2, 3, 5, 8, 10, 11], "instanti": [0, 1, 3, 4, 8, 9, 11], "creat": [0, 6, 8, 11], "time": [0, 1, 3, 8, 10], "fetch_ucilabelledcollect": [0, 10], "nfold": [0, 8, 10], "nrepeat": [0, 10], "abov": [0, 3, 5, 8], "conduct": [0, 8], "2x5fcv": 0, "all": [0, 1, 2, 3, 5, 8, 9, 11], "come": [0, 8, 10, 11], "numer": [0, 1, 3, 6, 10, 11], "form": [0, 8, 10, 11], "dens": [0, 11], "matric": [0, 5, 10], "acut": 0, "120": 0, "6": [0, 1, 3, 5, 10], "508": 0, "b": [0, 8, 10, 11], "583": 0, "417": 0, "balanc": [0, 4, 11], "625": 0, "539": 0, "461": 0, "922": 0, "078": 0, "breast": 0, "cancer": 0, "683": 0, "350": 0, "650": 0, "cmc": 0, "1473": 0, "573": 0, "427": 0, "774": 0, "226": 0, "653": 0, "347": 0, "ctg": 0, "2126": 0, "22": [0, 3, 9, 10], "222": [0, 9], "778": 0, "861": 0, "139": 0, "917": 0, "083": 0, "german": 0, "1000": [0, 4, 11], "24": [0, 9], "300": [0, 1, 9], "700": 0, "haberman": [0, 3], "306": 0, "735": 0, "265": 0, "ionospher": 0, "641": 0, "359": 0, "iri": 0, "150": 0, "667": 0, "333": 0, "mammograph": 0, "830": 0, "514": 0, "486": 0, "pageblock": 0, "5473": 0, "979": 0, "021": 0, "semeion": 0, "1593": 0, "256": [0, 9], "901": 0, "099": 0, "sonar": 0, "208": 0, "60": 0, "534": 0, "466": 0, "spambas": 0, "4601": 0, "57": 0, "606": 0, "394": 0, "spectf": 0, "267": 0, "44": 0, "794": 0, "206": 0, "tictacto": 0, "958": 0, "transfus": 0, "748": 0, "762": 0, "238": 0, "wdbc": 0, "569": 0, "627": 0, "373": 0, "wine": 0, "178": 0, "13": [0, 9], "669": 0, "331": 0, "601": 0, "399": 0, "730": 0, "q": [0, 2, 3, 8, 9, 11], "red": 0, "1599": 0, "465": 0, "535": 0, "white": 0, "4898": 0, "665": 0, "1484": 0, "8": [0, 1, 5, 10, 11], "711": 0, "289": 0, "download": [0, 2, 3, 8, 10], "automat": [0, 1], "thei": [0, 3, 11], "store": [0, 9, 10, 11], "quapy_data": [0, 8], "folder": [0, 10, 11], "faster": [0, 10], "reus": [0, 3, 8, 10], "howev": [0, 4, 5], "requir": [0, 1, 3, 6, 9], "special": [0, 5, 10], "action": 0, "moment": [0, 3], "fulli": [0, 8], "autom": [0, 3, 6], "cardiotocographi": 0, "excel": 0, "file": [0, 5, 8, 9, 10, 11], "user": [0, 1, 5], "instal": [0, 3, 6, 9, 11], "xlrd": [0, 2], "modul": [0, 1, 3, 5, 6, 7], "open": [0, 6, 10], "page": [0, 2, 6], "block": [0, 8], "need": [0, 3, 8, 10, 11], "unix": 0, "compress": 0, "extens": [0, 2, 5], "z": [0, 10], "directli": [0, 1, 3], "doabl": 0, "packag": [0, 2, 3, 6, 7], "like": [0, 1, 3, 5, 8, 9, 10, 11], "gzip": 0, "zip": [0, 5], "uncompress": 0, "o": [0, 8], "depend": [0, 1, 4, 5, 8, 11], "softwar": 0, "manual": 0, "do": [0, 1, 3, 4, 8, 9, 10, 11], "invok": [0, 1, 3, 8, 10], "provid": [0, 3, 5, 6, 10, 11], "loader": [0, 10], "simpl": [0, 3, 5, 11], "deal": 0, "t": [0, 1, 3, 8, 9, 11], "pre": [0, 3], "n": [0, 1, 8, 9, 11], "second": [0, 1, 3, 5, 8, 10], "represent": [0, 3, 8, 9, 11], "col": [0, 10], "int": [0, 5, 8, 10, 11], "float": [0, 3, 8, 9, 10, 11], "charg": [0, 10], "classmethod": [0, 8, 10, 11], "def": [0, 1, 3, 5, 8], "cl": 0, "path": [0, 3, 5, 8, 9, 10, 11], "str": [0, 8, 10, 11], "loader_func": [0, 10], "callabl": [0, 8, 10, 11], "defin": [0, 3, 8, 9, 10, 11], "argument": [0, 1, 3, 5, 8, 10, 11], "initi": [0, 9, 11], "particular": [0, 1, 3, 11], "receiv": [0, 3, 5], "addition": 0, "number": [0, 1, 3, 5, 8, 9, 10, 11], "specifi": [0, 1, 3, 5, 8, 9, 10], "otherwis": [0, 3, 8, 10], "infer": [0, 10], "least": [0, 10], "pass": [0, 1, 5, 8, 9, 11], "along": [0, 3, 8, 11], "train_path": [0, 10], "my_data": 0, "dat": [0, 9], "test_path": [0, 10], "my_custom_load": 0, "rb": 0, "fin": 0, "preprocess": [0, 1, 3, 8, 11], "includ": [0, 1, 3, 5, 6, 10, 11], "text2tfidf": [0, 1, 3, 10], "tfidf": [0, 4, 5, 10], "vector": [0, 8, 9, 10, 11], "reduce_column": [0, 10], "reduc": [0, 10], "column": [0, 10], "base": [0, 3, 6, 8, 9], "term": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "frequenc": [0, 10, 11], "transform": [0, 9, 10, 11], "valu": [0, 1, 3, 8, 9, 10, 11], "score": [0, 1, 4, 8, 9, 10], "subtract": [0, 8, 10], "normal": [0, 1, 3, 8, 10, 11], "deviat": [0, 1, 5, 8, 10], "so": [0, 1, 3, 5, 8, 9, 10, 11], "zero": [0, 8], "unit": [0, 8], "varianc": [0, 5], "textual": [0, 6, 10], "token": [0, 9, 10], "appeal": 1, "tool": [1, 6], "scenario": [1, 3, 4, 5, 6], "dataset": [1, 3, 4, 5, 6, 8, 9, 11], "shift": [1, 4, 6, 8, 9, 11], "particularli": 1, "prior": [1, 3, 4, 5, 6, 8, 11], "probabl": [1, 3, 4, 5, 6, 8, 9, 11], "That": [1, 4], "interest": [1, 5, 6, 8], "estim": [1, 3, 5, 6, 8, 9, 10, 11], "aris": 1, "under": 1, "belief": 1, "those": [1, 3, 4, 5, 8, 9, 11], "might": [1, 8, 10], "ones": [1, 3, 5, 8, 10, 11], "observ": [1, 11], "dure": [1, 5, 11], "other": [1, 3, 5, 6, 8, 10, 11], "word": [1, 3, 6, 9, 10, 11], "simpli": [1, 2, 3, 4, 5, 6, 8, 11], "predictor": 1, "assum": [1, 6, 11], "unlik": [1, 4, 8], "machin": [1, 4, 6, 9], "learn": [1, 2, 3, 4, 6, 8, 9, 10, 11], "govern": 1, "iid": [1, 5, 6], "assumpt": [1, 5, 6], "brief": [1, 10], "dedic": [1, 10], "explain": [1, 5], "here": [1, 11], "mae": [1, 4, 6, 8, 9, 11], "absolut": [1, 3, 5, 6, 8, 11], "mrae": [1, 6, 8, 9, 11], "rel": [1, 3, 8, 10, 11], "mse": [1, 3, 6, 8, 11], "squar": [1, 3, 8], "mkld": [1, 8, 11], "kullback": [1, 3, 8, 11], "leibler": [1, 3, 8, 11], "diverg": [1, 3, 8, 11], "mnkld": [1, 8, 11], "ae": [1, 2, 5, 8, 11], "rae": [1, 2, 8, 11], "se": [1, 8], "kld": [1, 2, 8, 9, 11], "nkld": [1, 2, 6, 8, 9, 11], "individu": [1, 3], "without": [1, 3, 8, 10], "averag": [1, 3, 8, 10, 11], "acc": [1, 3, 5, 6, 8, 11], "accuraci": [1, 5, 8, 11], "f1e": [1, 8], "f1": [1, 8, 9], "true_prev": [1, 5, 8], "prevs_hat": [1, 8], "ndarrai": [1, 3, 8, 10, 11], "contain": [1, 2, 3, 5, 8, 9, 10, 11], "smooth": [1, 8], "stabil": [1, 11], "third": [1, 5], "ep": [1, 8], "none": [1, 4, 8, 9, 10, 11], "paramet": [1, 3, 4, 8, 9, 10, 11], "epsilon": [1, 8, 11], "tradition": 1, "2t": [1, 8], "past": 1, "either": [1, 3, 8, 11], "environ": [1, 3, 4, 5, 8, 11], "variabl": [1, 3, 5, 8, 10], "onc": [1, 3, 5, 8, 10], "ommit": 1, "thereaft": 1, "recommend": [1, 5, 11], "np": [1, 3, 4, 5, 8, 10, 11], "asarrai": 1, "let": [1, 3, 11], "estim_prev": [1, 5, 8], "ae_": 1, "3f": [1, 6], "200": [1, 9], "600": 1, "914": 1, "final": [1, 3, 5, 11], "possibl": [1, 3, 8, 11], "string": [1, 8, 10, 11], "error_funct": 1, "from_nam": [1, 8], "accord": [1, 3, 4, 8, 9, 10, 11], "fix": [1, 4], "cover": [1, 4, 8, 9], "full": [1, 8], "contrast": 1, "natur": [1, 8], "despit": 1, "introduc": 1, "approxim": [1, 5, 8, 9], "preserv": [1, 5, 8], "procol": 1, "equal": [1, 8, 11], "distant": [1, 8], "interv": [1, 5, 8], "n_prevpoint": [1, 4, 5, 8], "determin": [1, 4, 5, 8], "constrain": [1, 5, 8, 10], "obtain": [1, 4, 8, 9, 11], "66": [1, 11], "given": [1, 3, 4, 8, 9, 10, 11], "num_prevalence_combin": [1, 8], "21": [1, 3, 5, 8], "n_class": [1, 3, 8, 9, 10, 11], "n_repeat": [1, 8], "1771": 1, "note": [1, 3, 4, 5, 8, 10], "last": [1, 3, 5, 8, 9, 10], "typic": [1, 4, 5, 8, 9, 10, 11], "singl": [1, 3, 6, 11], "higher": [1, 5], "comput": [1, 3, 5, 8, 11], "perform": [1, 3, 4, 5, 6, 8, 9, 11], "signific": 1, "instead": [1, 3, 4, 8, 10, 11], "work": [1, 3, 4, 5, 8, 10, 11], "wai": [1, 11], "around": [1, 10], "maximum": [1, 8, 9, 11], "budg": 1, "close": [1, 10], "than": [1, 4, 5, 8, 9, 10], "budget": [1, 4], "achiev": [1, 3, 4, 5], "get_nprevpoints_approxim": [1, 8], "5000": [1, 5], "4960": 1, "cost": 1, "sometim": 1, "cumbersom": 1, "control": [1, 4, 8], "overal": 1, "experi": [1, 2, 3, 4, 5, 8], "rather": [1, 4], "By": [1, 3, 8], "avoid": [1, 8], "lead": [1, 10], "closer": 1, "surpass": 1, "script": [1, 2, 3, 6, 11], "pacc": [1, 3, 5, 8, 11], "reli": [1, 3, 8, 11], "logist": [1, 3, 9, 11], "regressor": [1, 3], "classifi": [1, 4, 5, 6, 8, 9, 11], "variou": [1, 5], "metric": [1, 3, 4, 6, 8, 11], "sklearn": [1, 3, 4, 5, 6, 9, 10, 11], "linear_model": [1, 3, 4, 6, 9], "logisticregress": [1, 3, 4, 6, 9, 11], "data": [1, 3, 4, 5, 6, 8, 9, 11], "min_df": [1, 3, 4, 5, 10, 11], "inplac": [1, 3, 10, 11], "lr": [1, 3, 9, 11], "aggreg": [1, 4, 5, 6, 8], "fit": [1, 3, 4, 5, 6, 8, 9, 10, 11], "df": 1, "artificial_sampling_report": 1, "mani": [1, 3, 4, 5, 6, 8, 10, 11], "extract": [1, 8, 10], "categori": [1, 8], "n_repetit": [1, 4, 5], "n_job": [1, 3, 4, 8, 9, 10, 11], "parallel": [1, 3, 8, 9, 10, 11], "worker": [1, 8, 9, 10, 11], "cpu": [1, 9, 11], "random_se": [1, 8], "42": 1, "random": [1, 3, 4, 5, 8, 10], "seed": [1, 4, 8], "replic": [1, 4, 8], "error_metr": [1, 4, 8], "line": [1, 3, 8], "result": [1, 2, 3, 4, 5, 6, 11], "report": 1, "panda": [1, 2], "datafram": 1, "displai": [1, 5, 8, 9], "just": [1, 3], "clearer": 1, "shown": [1, 5, 8], "convert": [1, 3, 8, 9, 10, 11], "repres": [1, 3, 5, 8, 10, 11], "decim": 1, "default": [1, 3, 8, 9, 10, 11], "pd": 1, "set_opt": 1, "expand_frame_repr": 1, "fals": [1, 3, 5, 8, 9, 10, 11], "map": [1, 9, 11], "000": 1, "000e": 1, "091": 1, "909": 1, "009": 1, "048": 1, "426e": 1, "04": 1, "837": 1, "037": 1, "114": 1, "633e": 1, "03": 1, "7": [1, 5, 8, 9, 11], "717": 1, "017": 1, "041": 1, "383e": 1, "366": 1, "634": 1, "034": 1, "070": 1, "412e": 1, "459": 1, "541": 1, "387e": 1, "565": 1, "435": 1, "035": 1, "073": 1, "535e": 1, "654": 1, "346": 1, "046": 1, "108": 1, "701e": 1, "725": 1, "275": 1, "075": 1, "235": 1, "515e": 1, "02": 1, "858": 1, "142": 1, "042": 1, "229": 1, "740e": 1, "945": 1, "055": 1, "27": [1, 3, 9], "357": 1, "219e": 1, "578": 1, "dtype": [1, 10], "float64": 1, "artificial_sampling_ev": [1, 4], "artificial_sampling_predict": [1, 5], "arrai": [1, 3, 5, 8, 9, 10, 11], "pip": 2, "older": 2, "version": [2, 8, 9, 11], "scikit": [2, 3, 4, 8, 9, 10, 11], "numpi": [2, 4, 8, 9], "scipi": [2, 10], "pytorch": [2, 11], "quanet": [2, 6, 9, 11], "svmperf": [2, 3, 8, 11], "patch": [2, 3, 9, 11], "joblib": 2, "tqdm": 2, "matplotlib": [2, 8], "involv": [2, 5, 8], "you": [2, 3], "appli": [2, 3, 4, 5, 8, 9, 10, 11], "ext": 2, "compil": [2, 3], "sourc": [2, 3, 6, 9], "prepare_svmperf": [2, 3], "sh": [2, 3], "job": 2, "directori": [2, 8, 9, 10, 11], "svm_perf_quantif": [2, 3], "optim": [2, 3, 4, 8, 9, 11], "measur": [2, 3, 4, 5, 6, 8, 11], "propos": [2, 3, 11], "barranquero": [2, 3, 9, 11], "extend": [2, 3, 8, 11], "former": [2, 11], "categor": [3, 10], "belong": [3, 11], "non": [3, 11], "group": 3, "though": [3, 8], "plan": 3, "add": [3, 4, 8, 10], "more": [3, 5, 11], "futur": 3, "character": [3, 6], "fact": [3, 5], "product": [3, 10], "quantifi": [3, 4, 5, 6, 8, 10, 11], "shoud": 3, "basequantifi": [3, 8, 11], "abstract": [3, 8, 9, 10, 11], "abstractmethod": 3, "self": [3, 8, 9, 10, 11], "set_param": [3, 8, 9, 11], "get_param": [3, 8, 9, 11], "deep": [3, 8, 11], "familiar": 3, "structur": [3, 11], "inspir": 3, "reason": [3, 5, 6], "why": 3, "ha": [3, 4, 5, 8, 9, 10, 11], "adopt": [3, 4, 10], "respond": 3, "predict": [3, 4, 5, 8, 9, 11], "input": [3, 5, 8, 9, 11], "element": [3, 10, 11], "while": [3, 5, 9, 10, 11], "selector": 3, "process": [3, 4, 8], "hyperparamet": [3, 8, 11], "search": [3, 4, 6, 8, 11], "part": [3, 10], "aggregativequantifi": [3, 11], "must": [3, 10, 11], "fit_learn": 3, "classif_predict": [3, 11], "mention": 3, "befor": [3, 8, 9, 10, 11], "inde": [3, 4], "alreadi": [3, 8, 11], "preclassifi": 3, "maintain": [3, 11], "through": [3, 8], "properti": [3, 8, 9, 10, 11], "learner": [3, 4, 9, 11], "extern": 3, "probabilist": [3, 9, 11], "inherit": 3, "aggregativeprobabilisticquantifi": [3, 11], "posterior": [3, 8, 9, 11], "crisp": [3, 8, 11], "decis": [3, 8, 9, 11], "hard": [3, 9], "classif_posterior": [3, 11], "posterior_prob": [3, 11], "advantag": [3, 11], "procedur": [3, 6, 8], "veri": [3, 5], "effici": 3, "everi": [3, 8, 11], "leverag": 3, "speed": [3, 11], "up": [3, 4, 8, 9, 11], "over": [3, 4, 8], "customarili": [3, 4], "done": 3, "four": 3, "cc": [3, 5, 11], "simplest": 3, "deliv": [3, 11], "adjust": [3, 6, 8, 11], "pcc": [3, 4, 5, 11], "soft": 3, "serv": [3, 8, 10], "complet": [3, 5, 11], "equip": [3, 5], "svm": [3, 5, 6, 9, 10, 11], "linearsvc": [3, 5, 10], "pickl": [3, 8, 10, 11], "alia": [3, 8, 10, 11], "classifyandcount": [3, 11], "estim_preval": [3, 6, 11], "rate": [3, 8, 9, 11], "binari": [3, 5, 6, 8, 9, 10, 11], "init": 3, "addit": 3, "val_split": [3, 4, 9, 11], "integ": [3, 8, 9, 10, 11], "k": [3, 6, 8, 9, 10, 11], "fold": [3, 8, 10, 11], "cross": [3, 8, 9, 10, 11], "specif": [3, 4, 8], "held": [3, 4, 8, 9, 11], "out": [3, 4, 5, 8, 9, 10, 11], "postpon": 3, "constructor": 3, "prevail": 3, "overrid": 3, "illustr": [3, 4, 5], "seem": 3, "calibr": [3, 8], "calibratedclassifiercv": 3, "base_estim": 3, "cv": [3, 4], "predict_proba": [3, 9, 11], "As": [3, 4], "calibratedclassifi": 3, "except": [3, 8, 11], "rais": [3, 8, 11], "lastli": 3, "everyth": 3, "said": 3, "aboud": 3, "sld": [3, 11], "expectationmaximizationquantifi": [3, 11], "describ": [3, 8, 11], "saeren": [3, 11], "m": [3, 8, 11], "latinn": [3, 11], "decaesteck": [3, 11], "c": [3, 4, 8, 9, 10, 11], "2002": 3, "priori": 3, "14": 3, "41": 3, "attempt": [3, 11], "although": [3, 4, 5, 11], "improv": [3, 8, 9, 11], "rank": [3, 9], "almost": 3, "alwai": [3, 4, 5, 11], "among": 3, "effect": 3, "carri": [3, 10, 11], "gonz\u00e1lez": 3, "castro": 3, "v": [3, 8, 9, 11], "alaiz": 3, "rodr\u0131": 3, "guez": 3, "alegr": 3, "2013": 3, "scienc": 3, "218": 3, "146": 3, "It": [3, 4, 5, 8], "allia": 3, "hellingerdistancei": [3, 11], "mixtur": [3, 8, 11], "previou": 3, "overridden": [3, 11], "proport": [3, 4, 9, 10, 11], "taken": [3, 8, 9, 10], "itself": [3, 8, 11], "accept": 3, "elm": [3, 11], "famili": [3, 11], "target": [3, 5, 6, 8, 9, 11], "orient": [3, 6, 8, 11], "joachim": [3, 9, 11], "svmq": [3, 11], "d\u00edez": 3, "reliabl": 3, "pattern": 3, "recognit": 3, "48": 3, "591": 3, "604": 3, "svmkld": [3, 11], "multivari": [3, 9], "transact": 3, "discoveri": 3, "articl": [3, 4], "svmnkld": [3, 11], "svmae": [3, 11], "error": [3, 4, 6, 7, 9, 11], "svmrae": [3, 11], "what": 3, "nowadai": 3, "consid": [3, 5, 8, 9, 10, 11], "behav": [3, 5], "If": [3, 5, 8, 10, 11], "want": [3, 4], "custom": [3, 6, 10], "modifi": [3, 8], "assign": [3, 10], "Then": 3, "re": [3, 4, 9, 10], "thing": 3, "your": 3, "svmperf_hom": 3, "valid_loss": [3, 9, 11], "mycustomloss": 3, "28": [3, 10], "current": [3, 8, 9, 10, 11], "support": [3, 6, 9, 10, 11], "oper": 3, "trivial": 3, "strategi": [3, 4], "2016": [3, 10, 11], "sentiment": [3, 6, 10], "19": [3, 10], "onevsal": [3, 11], "know": 3, "where": [3, 5, 8, 9, 10, 11], "top": [3, 8, 11], "thu": [3, 4, 5, 8, 9, 11], "nor": 3, "castano": [3, 10], "2019": [3, 10, 11], "dynam": [3, 9, 10, 11], "task": [3, 4, 10], "45": [3, 5, 10], "15": [3, 8, 10], "polici": [3, 11], "processor": 3, "av": [3, 11], "ptr": [3, 11], "member": [3, 11], "d": [3, 11], "static": [3, 11], "red_siz": [3, 11], "pleas": 3, "check": [3, 4, 8], "offer": [3, 6], "torch": [3, 9, 11], "embed": [3, 9, 11], "lstm": [3, 9, 11], "cnn": [3, 11], "its": [3, 4, 8, 9, 11], "layer": [3, 9, 11], "neuralclassifiertrain": [3, 9, 11], "cnnnet": [3, 9, 11], "vocabulary_s": [3, 9, 10, 11], "cuda": [3, 9, 11], "supervis": [4, 6], "strongli": [4, 5], "good": [4, 5], "choic": [4, 11], "hyper": [4, 8, 9], "wherebi": 4, "chosen": [4, 8], "pick": 4, "best": [4, 8, 9, 11], "being": [4, 8, 11], "criteria": 4, "solv": [4, 11], "assess": 4, "own": 4, "right": [4, 8, 10], "impos": [4, 8], "aim": [4, 5], "appropri": 4, "configur": [4, 8], "design": 4, "long": [4, 9], "regard": 4, "next": [4, 8, 9, 10], "section": 4, "argu": 4, "alejandro": 4, "fabrizio": 4, "count": [4, 5, 6, 8, 10, 11], "arxiv": 4, "preprint": 4, "2011": 4, "02552": 4, "2020": [4, 9], "varieti": 4, "exhibit": [4, 5], "degre": 4, "model_select": [4, 7, 11], "gridsearchq": [4, 8, 11], "grid": [4, 8, 11], "explor": [4, 8], "portion": 4, "param_grid": [4, 8, 11], "logspac": [4, 11], "class_weight": [4, 11], "eval_budget": 4, "refit": [4, 8], "retrain": [4, 9], "goe": 4, "end": [4, 8, 11], "best_params_": 4, "best_model_": 4, "101": 4, "5f": 4, "system": [4, 11], "start": 4, "hyperparam": 4, "0001": [4, 11], "got": [4, 11], "24987": 4, "48135": 4, "001": [4, 9, 11], "24866": 4, "100000": 4, "43676": 4, "finish": 4, "param": [4, 8, 9, 11], "19982": 4, "develop": [4, 6], "1010": 4, "5005": 4, "54it": 4, "20342": 4, "altern": 4, "computation": 4, "costli": 4, "try": 4, "theoret": 4, "suboptim": 4, "opt": 4, "gridsearchcv": [4, 11], "10000": 4, "5379": 4, "55it": 4, "41734": 4, "wors": [4, 5, 8], "larg": 4, "between": [4, 5, 6, 8, 9, 11], "modal": 4, "turn": 4, "better": 4, "nonetheless": 4, "happen": [4, 5], "basic": [5, 11], "help": 5, "analys": [5, 6], "outcom": 5, "main": 5, "method_nam": [5, 8, 11], "name": [5, 8, 9, 10, 11], "shape": [5, 8, 9, 10, 11], "correspond": [5, 10], "matrix": [5, 8, 11], "appear": 5, "occur": [5, 10], "merg": 5, "emq": [5, 11], "55": 5, "showcas": 5, "wide": 5, "variant": [5, 6, 8, 11], "linear": [5, 8, 11], "review": [5, 6, 10], "step": [5, 8], "05": [5, 8, 11], "gen_data": 5, "base_classifi": 5, "yield": [5, 8, 10, 11], "tr_prev": [5, 8, 11], "append": 5, "__class__": 5, "__name__": 5, "insight": 5, "view": 5, "y": [5, 8, 9, 10, 11], "axi": [5, 8], "against": 5, "x": [5, 8, 9, 10, 11], "unfortun": 5, "limit": [5, 8, 11], "binary_diagon": [5, 8], "train_prev": [5, 8], "savepath": [5, 8], "bin_diag": 5, "png": 5, "save": [5, 8], "pdf": [5, 11], "cyan": 5, "dot": [5, 8], "color": [5, 8], "band": [5, 8], "hidden": [5, 9, 11], "show_std": [5, 8], "unadjust": 5, "bias": 5, "toward": [5, 10], "seen": [5, 8, 11], "evinc": 5, "box": [5, 8], "binary_bias_glob": [5, 8], "bin_bia": 5, "unbias": 5, "center": 5, "tend": 5, "overestim": 5, "high": [5, 8], "lower": [5, 11], "again": 5, "accordingli": 5, "20": [5, 8, 11], "90": [5, 8], "rewrit": 5, "method_data": 5, "training_preval": 5, "linspac": 5, "training_s": 5, "suffic": 5, "latex": 5, "syntax": 5, "_": [5, 8, 10], "now": 5, "clearli": 5, "binary_bias_bin": [5, 8], "broken": [5, 8], "down": [5, 8, 10], "bin": [5, 8, 11], "To": [5, 10], "nbin": [5, 8, 11], "isometr": [5, 8], "subinterv": 5, "interestingli": 5, "enough": 5, "seemingli": 5, "tendenc": 5, "low": [5, 8, 9], "underestim": 5, "beyond": 5, "67": [5, 8], "curios": 5, "pretti": 5, "discuss": 5, "analyz": 5, "compar": [5, 8], "both": 5, "irrespect": [5, 11], "harder": 5, "interpret": [5, 6, 11], "error_by_drift": [5, 8], "error_nam": [5, 8], "n_bin": [5, 8, 11], "err_drift": 5, "whenev": [5, 8], "clear": 5, "lowest": 5, "difficult": 5, "rememb": 5, "solid": 5, "comparison": 5, "detriment": 5, "visual": [5, 6], "hide": 5, "framework": [6, 11], "written": 6, "root": 6, "concept": 6, "baselin": 6, "integr": 6, "commonli": 6, "facilit": 6, "twitter": [6, 10], "true_preval": 6, "hold": [6, 8, 11], "endeavour": [6, 8], "popular": 6, "expect": [6, 11], "maxim": [6, 11], "hdy": [6, 11], "versatil": 6, "etc": 6, "uci": [6, 10], "nativ": 6, "loss": [6, 9, 11], "perf": [6, 9, 11], "ad": 6, "meta": [6, 8], "plot": [6, 7], "diagon": [6, 8], "bia": [6, 8, 9, 11], "drift": 6, "api": 6, "subpackag": 7, "submodul": 7, "util": [7, 9], "content": 7, "bctscalibr": 9, "nbvscalibr": 9, "recalibratedprobabilisticclassifi": 9, "recalibratedprobabilisticclassifierbas": 9, "classes_": [9, 10, 11], "fit_cv": 9, "fit_tr_val": 9, "tscalibr": 9, "vscalibr": 9, "lowranklogisticregress": 9, "document_embed": 9, "lstmnet": 9, "reset_net_param": 9, "textclassifiernet": 9, "dimens": [8, 9, 10, 11], "forward": [9, 11], "xavier_uniform": 9, "torchdataset": 9, "asdataload": 9, "decision_funct": 9, "splitstratifi": 10, "stat": 10, "train_test": 10, "xp": 10, "xy": 10, "split_random": 10, "split_stratifi": 10, "uniform_sampl": 10, "uniform_sampling_index": 10, "fetch_lequa2022": 10, "warn": 10, "indextransform": 10, "add_word": 10, "fit_transform": 10, "reader": 8, "binar": [8, 10], "from_csv": 10, "from_spars": 10, "from_text": 10, "reindex_label": 10, "getptecondestim": 11, "solve_adjust": 11, "adjustedclassifyandcount": 11, "distributionmatch": 11, "dy": 11, "em": 11, "max_it": 11, "explicitlossminimis": 11, "max": 11, "ms2": 11, "mediansweep": 11, "mediansweep2": 11, "probabilisticadjustedclassifyandcount": 11, "probabilisticclassifyandcount": 11, "smm": 11, "t50": 11, "thresholdoptim": 11, "cross_generate_predict": 11, "cross_generate_predictions_depr": 11, "binaryquantifi": 11, "onevsallgener": 11, "eacc": 11, "ecc": 11, "eemq": 11, "ehdi": 11, "epacc": 11, "valid_polici": 11, "ensemblefactori": 11, "get_probability_distribut": 11, "quanetmodul": 11, "quanettrain": 11, "clean_checkpoint": 11, "clean_checkpoint_dir": 11, "mae_loss": 11, "non_aggreg": 8, "maximumlikelihoodprevalenceestim": 11, "absolute_error": 8, "hat": 8, "frac": 8, "mathcal": 8, "sum_": 8, "acc_error": 8, "y_true": 8, "y_pred": 8, "tp": 8, "tn": 8, "fp": 8, "fn": 8, "stand": [8, 11], "f1_error": 8, "macro": 8, "f_1": 8, "harmon": 8, "recal": 8, "2tp": 8, "independ": [8, 11], "err_nam": 8, "p_hat": 8, "d_": 8, "kl": 8, "log": [8, 10], "factor": 8, "beforehand": 8, "n_sampl": [8, 9], "mean_absolute_error": 8, "mean_relative_absolute_error": 8, "relative_absolute_error": 8, "underlin": 8, "displaystyl": 8, "abstractprotocol": 8, "union": [8, 11], "aggr_speedup": 8, "auto": 8, "evaluation_report": 8, "app": [8, 11], "repeat": 8, "smooth_limits_epsilon": 8, "random_st": [8, 10], "return_typ": 8, "sample_prev": 8, "abstractstochasticseededprotocol": 8, "onlabelledcollectionprotocol": 8, "95": 8, "copi": [8, 10], "quantiti": 8, "labelled_collect": 8, "prevalence_grid": 8, "exhaust": 8, "sum": [8, 11], "implicit": 8, "return_constrained_dim": 8, "rest": [8, 9, 10, 11], "quit": 8, "obvious": 8, "determinist": 8, "anywher": 8, "multipli": 8, "necessari": 8, "samples_paramet": 8, "total": 8, "parent": 8, "sequenc": 8, "enforc": 8, "collat": 8, "arg": [8, 10, 11], "domainmix": 8, "domaina": 8, "domainb": 8, "mixture_point": 8, "domain": 8, "scale": [8, 9, 11], "npp": 8, "draw": 8, "uniformli": 8, "therefor": 8, "get_col": 8, "get_labelled_collect": 8, "on_preclassified_inst": 8, "pre_classif": 8, "in_plac": 8, "usimplexpp": 8, "kraemer": 8, "algorithm": [8, 11], "sens": 8, "guarante": [8, 10], "prefer": 8, "intract": 8, "hellingerdist": 8, "hellingh": 8, "distanc": [8, 11], "hd": [8, 11], "discret": [8, 11], "sqrt": 8, "p_i": 8, "q_i": 8, "real": [8, 9, 10, 11], "topsoedist": 8, "1e": [8, 9, 11], "topso": [8, 11], "adjusted_quantif": 8, "prevalence_estim": 8, "tpr": [8, 11], "fpr": [8, 11], "clip": 8, "exce": 8, "check_prevalence_vector": 8, "raise_except": 8, "toleranz": 8, "08": 8, "combinations_budget": 8, "largest": 8, "dimension": [8, 9, 10, 11], "repetit": 8, "less": [8, 10], "normalize_preval": 8, "l1": [8, 11], "calcul": 8, "binom": 8, "mass": 8, "alloc": [8, 9], "solut": 8, "star": 8, "bar": 8, "prevalence_from_label": 8, "n_instanc": [8, 9, 11], "correctli": 8, "even": 8, "len": 8, "prevalence_from_prob": 8, "bool": [8, 9, 11], "argmax": 8, "prevalence_linspac": 8, "01": [8, 9, 11], "separ": [8, 10], "99": 8, "uniform_prevalence_sampl": 8, "adapt": [8, 9], "post": 8, "http": [8, 10, 11], "stackexchang": 8, "com": 8, "question": 8, "3227": 8, "uniform": [8, 10], "uniform_simplex_sampl": 8, "dict": [8, 10, 11], "timeout": 8, "dictionari": [8, 9, 10, 11], "kei": [8, 10], "quantification_error": 8, "whether": [8, 9, 10, 11], "ignor": [8, 10, 11], "gen": 8, "establish": 8, "timer": 8, "longer": 8, "timeouterror": 8, "bound": [8, 11], "stdout": 8, "best_model": 8, "after": [8, 11], "minim": [8, 11], "routin": [8, 10, 11], "unus": [8, 9], "contanin": 8, "cross_val_predict": 8, "akin": [8, 11], "issu": 8, "reproduc": [8, 10], "pos_class": [8, 10], "titl": 8, "colormap": 8, "listedcolormap": 8, "vertical_xtick": 8, "legend": 8, "local": 8, "sign": 8, "minu": 8, "classs": 8, "compon": [8, 9, 11], "cm": 8, "tab10": 8, "secondari": 8, "global": 8, "method_ord": 8, "henc": [8, 10], "conveni": 8, "multiclass": [8, 10, 11], "inconveni": 8, "leyend": 8, "hightlight": 8, "associ": [8, 10], "brokenbar_supremacy_by_drift": 8, "isomer": 8, "x_error": 8, "y_error": 8, "ttest_alpha": 8, "005": 8, "tail_density_threshold": 8, "region": 8, "chart": 8, "condit": [8, 11], "ii": 8, "significantli": 8, "side": 8, "confid": 8, "percentil": 8, "divid": 8, "amount": 8, "similar": [8, 11], "threshold": [8, 11], "densiti": 8, "tail": 8, "discard": 8, "outlier": 8, "show_dens": 8, "show_legend": 8, "logscal": 8, "vline": 8, "especi": 8, "mai": 8, "cumberson": 8, "gain": 8, "understand": 8, "fare": 8, "regim": 8, "highlight": 8, "vertic": 8, "earlystop": 8, "patienc": [8, 9, 11], "lower_is_bett": 8, "earli": [8, 9, 11], "stop": [8, 9, 11], "epoch": [8, 9, 11], "best_epoch": 8, "best_scor": 8, "consecut": [8, 9, 11], "monitor": 8, "obtaind": 8, "far": [8, 9, 10], "flag": 8, "keep": 8, "track": 8, "boolean": [8, 10, 11], "create_if_not_exist": 8, "makedir": 8, "exist_ok": 8, "join": 8, "dir": 8, "subdir": 8, "anotherdir": 8, "create_parent_dir": 8, "exist": 8, "txt": 8, "download_fil": 8, "url": 8, "archive_filenam": 8, "destin": 8, "filenam": 8, "download_file_if_not_exist": 8, "dowload": 8, "get_quapy_hom": 8, "home": [8, 10], "perman": 8, "map_parallel": 8, "func": 8, "slice": 8, "item": 8, "wrapper": [8, 9, 10, 11], "multiprocess": 8, "delai": 8, "args_i": 8, "silent": [8, 11], "child": 8, "ensur": 8, "pickled_resourc": 8, "pickle_path": 8, "generation_func": 8, "fast": [8, 10], "resourc": 8, "some_arrai": 8, "mock": [8, 9], "rand": 8, "my_arrai": 8, "pkl": 8, "save_text_fil": 8, "disk": 8, "miss": 8, "temp_se": 8, "context": 8, "tempor": 8, "outer": 8, "state": 8, "within": [8, 11], "get_njob": [], "correct": [9, 11], "temperatur": [9, 11], "bct": [9, 11], "abstent": 9, "alexandari": [9, 11], "afterward": [9, 11], "No": [9, 11], "nbv": [9, 11], "baseestim": [9, 11], "calibratorfactori": 9, "n_compon": 9, "kwarg": [9, 10, 11], "decomposit": 9, "truncatedsvd": 9, "princip": 9, "regress": 9, "n_featur": 9, "length": [9, 10], "eventu": [9, 10], "unalt": 9, "emb": 9, "embedding_s": 9, "hidden_s": 9, "repr_siz": 9, "kernel_height": 9, "stride": 9, "pad": [9, 10], "drop_p": 9, "convolut": 9, "vocabulari": [9, 10], "kernel": 9, "drop": 9, "dropout": [9, 11], "batch": 9, "dataload": 9, "tensor": 9, "n_dimens": 9, "lstm_class_nlay": 9, "short": 9, "memori": 9, "net": 9, "weight_decai": 9, "batch_siz": 9, "64": [9, 11], "batch_size_test": 9, "512": [9, 11], "padding_length": 9, "checkpointpath": 9, "checkpoint": [9, 11], "classifier_net": 9, "weight": [9, 10], "decai": 9, "wait": 9, "enabl": 9, "gpu": [9, 11], "vocab_s": 9, "reiniti": 9, "trainer": 9, "disjoint": 9, "embed_s": 9, "nn": 9, "pad_length": 9, "xavier": 9, "shuffl": [9, 10], "longest": 9, "shorter": 9, "svmperf_bas": [9, 11], "classifiermixin": 9, "thorsten": 9, "refer": [9, 10], "svm_perf_learn": 9, "svm_perf_classifi": 9, "trade": 9, "off": 9, "margin": 9, "std": 9, "qacc": 9, "qf1": 9, "qgm": 9, "12": 9, "26": 9, "23": 9, "train_siz": 10, "conform": 10, "round": 10, "loader_kwarg": 10, "read": 10, "tupl": [10, 11], "tr": 10, "te": 10, "csr": 10, "csr_matrix": 10, "4403": 10, "my_collect": 10, "codefram": 10, "larger": [10, 11], "actual": [10, 11], "empti": 10, "met": 10, "whose": [10, 11], "train_prop": 10, "left": [8, 10], "stratif": 10, "greater": 10, "dataset_nam": 10, "data_hom": 10, "test_split": 10, "predefin": 10, "uci_dataset": 10, "dump": 10, "leav": 10, "quay_data": 10, "ml": 10, "5fcvx2": 10, "x2": 10, "offici": 10, "lequa": 10, "competit": 10, "t1a": 10, "t1b": 10, "t2a": 10, "t2b": 10, "raw": 10, "merchandis": 10, "sperduti": 10, "2022": 10, "overview": 10, "clef": 10, "lequa2022_experi": 10, "py": 10, "guid": 10, "val_gen": 10, "test_gen": 10, "samplesfromdir": 10, "minimun": 10, "kept": 10, "subsequ": 10, "mining6": 10, "devel": 10, "style": 10, "countvector": 10, "keyword": [10, 11], "nogap": 10, "regardless": 10, "codifi": 10, "unknown": 10, "surfac": 10, "assert": 10, "gap": 10, "preced": 10, "decid": 10, "uniqu": 10, "rare": 10, "unk": 10, "minimum": [10, 11], "occurr": 10, "org": [10, 11], "stabl": 10, "feature_extract": 10, "html": 10, "subtyp": 10, "spmatrix": 10, "remov": [10, 11], "infrequ": 10, "aka": [10, 11], "sublinear_tf": 10, "scall": 10, "counter": 10, "tfidfvector": 10, "whcih": 10, "had": 10, "encod": 10, "utf": 10, "csv": 10, "feat1": 10, "feat2": 10, "featn": 10, "covari": 10, "express": 10, "row": 10, "class2int": 10, "collet": 10, "fomart": 10, "progress": 10, "sentenc": 10, "classnam": 10, "u1": 10, "misclassif": 11, "n_classes_": [], "fit_classifi": 11, "bypass": 11, "y_": 11, "ptecondestim": 11, "prevs_estim": 11, "ax": 11, "entri": 11, "y_i": 11, "y_j": 11, "_posterior_probabilities_": 11, "attribut": 11, "subclass": 11, "give": 11, "outsid": 11, "unless": 11, "noth": 11, "els": 11, "cdf": 11, "match": 11, "helling": 11, "sought": 11, "channel": 11, "proper": 11, "ch": 11, "di": 11, "dij": 11, "fraction": 11, "th": 11, "tol": 11, "ternari": 11, "dl": 11, "doi": 11, "1145": 11, "3219819": 11, "3220059": 11, "histogram": 11, "toler": 11, "explicit": 11, "exact_train_prev": 11, "recalib": 11, "updat": 11, "likelihood": [9, 11], "mutual": 11, "recurs": 11, "until": 11, "converg": 11, "suggest": 11, "recalibr": 11, "reach": 11, "loop": 11, "cumul": 11, "unlabel": 11, "latter": 11, "forman": 11, "2006": 11, "2008": 11, "goal": 11, "bring": 11, "denomin": 11, "median": 11, "sweep": 11, "binary_quantifi": 11, "prevel": 11, "emploi": 11, "resp": 11, "subobject": 11, "nest": 11, "pipelin": 11, "__": 11, "simplif": 11, "2021": 11, "equival": 11, "cosest": 11, "heurist": 11, "choos": 11, "ground": 11, "complement": 11, "param_mod_sel": 11, "param_model_sel": 11, "min_po": 11, "max_sample_s": 11, "closest": 11, "preliminari": 11, "recomput": 11, "compat": 11, "l": 11, "base_quantifier_class": 11, "factori": 11, "common": 11, "doc_embedding_s": 11, "stats_siz": 11, "lstm_hidden_s": 11, "lstm_nlayer": 11, "ff_layer": 11, "1024": 11, "bidirect": 11, "qdrop_p": 11, "order_bi": 11, "cell": 11, "connect": 11, "ff": 11, "sort": 11, "doc_embed": 11, "doc_posterior": 11, "recip": 11, "care": 11, "regist": 11, "hook": 11, "n_epoch": 11, "tr_iter_per_poch": 11, "va_iter_per_poch": 11, "checkpointdir": 11, "checkpointnam": 11, "phase": 11, "anyth": 11, "truth": 11, "mlpe": 11, "lazi": 11, "put": 11, "assumpion": 11, "beat": [9, 11], "estimant": 11, "kundaj": 9, "shrikumar": 9, "novemb": 9, "232": 9, "pmlr": 9, "outpu": 9, "partit": 9, "ight": [], "valueerror": 8}, "objects": {"": [[8, 0, 0, "-", "quapy"]], "quapy": [[9, 0, 0, "-", "classification"], [10, 0, 0, "-", "data"], [8, 0, 0, "-", "error"], [8, 0, 0, "-", "evaluation"], [8, 0, 0, "-", "functional"], [11, 0, 0, "-", "method"], [8, 0, 0, "-", "model_selection"], [8, 0, 0, "-", "plot"], [8, 0, 0, "-", "protocol"], [8, 0, 0, "-", "util"]], "quapy.classification": [[9, 0, 0, "-", "calibration"], [9, 0, 0, "-", "methods"], [9, 0, 0, "-", "neural"], [9, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[9, 1, 1, "", "BCTSCalibration"], [9, 1, 1, "", "NBVSCalibration"], [9, 1, 1, "", "RecalibratedProbabilisticClassifier"], [9, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [9, 1, 1, "", "TSCalibration"], [9, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[9, 2, 1, "", "classes_"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "fit_cv"], [9, 3, 1, "", "fit_tr_val"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[9, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural": [[9, 1, 1, "", "CNNnet"], [9, 1, 1, "", "LSTMnet"], [9, 1, 1, "", "NeuralClassifierTrainer"], [9, 1, 1, "", "TextClassifierNet"], [9, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[9, 2, 1, "", "device"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "reset_net_params"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[9, 3, 1, "", "dimensions"], [9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "forward"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict_proba"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"], [9, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[9, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[9, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[9, 3, 1, "", "decision_function"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "set_params"], [9, 4, 1, "", "valid_losses"]], "quapy.data": [[10, 0, 0, "-", "base"], [10, 0, 0, "-", "datasets"], [10, 0, 0, "-", "preprocessing"], [10, 0, 0, "-", "reader"]], "quapy.data.base": [[10, 1, 1, "", "Dataset"], [10, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[10, 3, 1, "", "SplitStratified"], [10, 2, 1, "", "binary"], [10, 2, 1, "", "classes_"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 3, 1, "", "stats"], [10, 2, 1, "", "train_test"], [10, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[10, 2, 1, "", "X"], [10, 2, 1, "", "Xp"], [10, 2, 1, "", "Xy"], [10, 2, 1, "", "binary"], [10, 3, 1, "", "counts"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 2, 1, "", "p"], [10, 3, 1, "", "prevalence"], [10, 3, 1, "", "sampling"], [10, 3, 1, "", "sampling_from_index"], [10, 3, 1, "", "sampling_index"], [10, 3, 1, "", "split_random"], [10, 3, 1, "", "split_stratified"], [10, 3, 1, "", "stats"], [10, 3, 1, "", "uniform_sampling"], [10, 3, 1, "", "uniform_sampling_index"], [10, 2, 1, "", "y"]], "quapy.data.datasets": [[10, 5, 1, "", "fetch_UCIDataset"], [10, 5, 1, "", "fetch_UCILabelledCollection"], [10, 5, 1, "", "fetch_lequa2022"], [10, 5, 1, "", "fetch_reviews"], [10, 5, 1, "", "fetch_twitter"], [10, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[10, 1, 1, "", "IndexTransformer"], [10, 5, 1, "", "index"], [10, 5, 1, "", "reduce_columns"], [10, 5, 1, "", "standardize"], [10, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[10, 3, 1, "", "add_word"], [10, 3, 1, "", "fit"], [10, 3, 1, "", "fit_transform"], [10, 3, 1, "", "transform"], [10, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[10, 5, 1, "", "binarize"], [10, 5, 1, "", "from_csv"], [10, 5, 1, "", "from_sparse"], [10, 5, 1, "", "from_text"], [10, 5, 1, "", "reindex_labels"]], "quapy.error": [[8, 5, 1, "", "absolute_error"], [8, 5, 1, "", "acc_error"], [8, 5, 1, "", "acce"], [8, 5, 1, "", "ae"], [8, 5, 1, "", "f1_error"], [8, 5, 1, "", "f1e"], [8, 5, 1, "", "from_name"], [8, 5, 1, "", "kld"], [8, 5, 1, "", "mae"], [8, 5, 1, "", "mean_absolute_error"], [8, 5, 1, "", "mean_relative_absolute_error"], [8, 5, 1, "", "mkld"], [8, 5, 1, "", "mnkld"], [8, 5, 1, "", "mrae"], [8, 5, 1, "", "mse"], [8, 5, 1, "", "nkld"], [8, 5, 1, "", "rae"], [8, 5, 1, "", "relative_absolute_error"], [8, 5, 1, "", "se"], [8, 5, 1, "", "smooth"]], "quapy.evaluation": [[8, 5, 1, "", "evaluate"], [8, 5, 1, "", "evaluation_report"], [8, 5, 1, "", "prediction"]], "quapy.functional": [[8, 5, 1, "", "HellingerDistance"], [8, 5, 1, "", "TopsoeDistance"], [8, 5, 1, "", "adjusted_quantification"], [8, 5, 1, "", "check_prevalence_vector"], [8, 5, 1, "", "get_nprevpoints_approximation"], [8, 5, 1, "", "normalize_prevalence"], [8, 5, 1, "", "num_prevalence_combinations"], [8, 5, 1, "", "prevalence_from_labels"], [8, 5, 1, "", "prevalence_from_probabilities"], [8, 5, 1, "", "prevalence_linspace"], [8, 5, 1, "", "strprev"], [8, 5, 1, "", "uniform_prevalence_sampling"], [8, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[11, 0, 0, "-", "aggregative"], [11, 0, 0, "-", "base"], [11, 0, 0, "-", "meta"], [11, 0, 0, "-", "neural"], [11, 0, 0, "-", "non_aggregative"]], "quapy.method.aggregative": [[11, 1, 1, "", "ACC"], [11, 4, 1, "", "AdjustedClassifyAndCount"], [11, 1, 1, "", "AggregativeProbabilisticQuantifier"], [11, 1, 1, "", "AggregativeQuantifier"], [11, 1, 1, "", "CC"], [11, 4, 1, "", "ClassifyAndCount"], [11, 1, 1, "", "DistributionMatching"], [11, 1, 1, "", "DyS"], [11, 1, 1, "", "ELM"], [11, 1, 1, "", "EMQ"], [11, 4, 1, "", "ExpectationMaximizationQuantifier"], [11, 4, 1, "", "ExplicitLossMinimisation"], [11, 1, 1, "", "HDy"], [11, 4, 1, "", "HellingerDistanceY"], [11, 1, 1, "", "MAX"], [11, 1, 1, "", "MS"], [11, 1, 1, "", "MS2"], [11, 4, 1, "", "MedianSweep"], [11, 4, 1, "", "MedianSweep2"], [11, 1, 1, "", "OneVsAll"], [11, 1, 1, "", "PACC"], [11, 1, 1, "", "PCC"], [11, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [11, 4, 1, "", "ProbabilisticClassifyAndCount"], [11, 4, 1, "", "SLD"], [11, 1, 1, "", "SMM"], [11, 1, 1, "", "SVMAE"], [11, 1, 1, "", "SVMKLD"], [11, 1, 1, "", "SVMNKLD"], [11, 1, 1, "", "SVMQ"], [11, 1, 1, "", "SVMRAE"], [11, 1, 1, "", "T50"], [11, 1, 1, "", "ThresholdOptimization"], [11, 1, 1, "", "X"], [11, 5, 1, "", "cross_generate_predictions"], [11, 5, 1, "", "cross_generate_predictions_depr"]], "quapy.method.aggregative.ACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"], [11, 3, 1, "", "solve_adjustment"]], "quapy.method.aggregative.AggregativeProbabilisticQuantifier": [[11, 3, 1, "", "classify"]], "quapy.method.aggregative.AggregativeQuantifier": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 2, 1, "", "classifier"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.aggregative.CC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DistributionMatching": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DyS": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ELM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.EMQ": [[11, 3, 1, "", "EM"], [11, 4, 1, "", "EPSILON"], [11, 4, 1, "", "MAX_ITER"], [11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.OneVsAll": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "set_params"]], "quapy.method.aggregative.PACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.SMM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ThresholdOptimization": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.base": [[11, 1, 1, "", "BaseQuantifier"], [11, 1, 1, "", "BinaryQuantifier"], [11, 1, 1, "", "OneVsAllGeneric"]], "quapy.method.base.BaseQuantifier": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[11, 2, 1, "", "classes"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.meta": [[11, 5, 1, "", "EACC"], [11, 5, 1, "", "ECC"], [11, 5, 1, "", "EEMQ"], [11, 5, 1, "", "EHDy"], [11, 5, 1, "", "EPACC"], [11, 1, 1, "", "Ensemble"], [11, 5, 1, "", "ensembleFactory"], [11, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[11, 4, 1, "", "VALID_POLICIES"], [11, 2, 1, "", "aggregative"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 2, 1, "", "probabilistic"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.neural": [[11, 1, 1, "", "QuaNetModule"], [11, 1, 1, "", "QuaNetTrainer"], [11, 5, 1, "", "mae_loss"]], "quapy.method.neural.QuaNetModule": [[11, 2, 1, "", "device"], [11, 3, 1, "", "forward"], [11, 4, 1, "", "training"]], "quapy.method.neural.QuaNetTrainer": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "clean_checkpoint"], [11, 3, 1, "", "clean_checkpoint_dir"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[11, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.model_selection": [[8, 1, 1, "", "GridSearchQ"], [8, 5, 1, "", "cross_val_predict"]], "quapy.model_selection.GridSearchQ": [[8, 3, 1, "", "best_model"], [8, 3, 1, "", "fit"], [8, 3, 1, "", "get_params"], [8, 3, 1, "", "quantify"], [8, 3, 1, "", "set_params"]], "quapy.plot": [[8, 5, 1, "", "binary_bias_bins"], [8, 5, 1, "", "binary_bias_global"], [8, 5, 1, "", "binary_diagonal"], [8, 5, 1, "", "brokenbar_supremacy_by_drift"], [8, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[8, 1, 1, "", "APP"], [8, 1, 1, "", "AbstractProtocol"], [8, 1, 1, "", "AbstractStochasticSeededProtocol"], [8, 1, 1, "", "DomainMixer"], [8, 1, 1, "", "NPP"], [8, 1, 1, "", "OnLabelledCollectionProtocol"], [8, 1, 1, "", "USimplexPP"]], "quapy.protocol.APP": [[8, 3, 1, "", "prevalence_grid"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[8, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[8, 3, 1, "", "collator"], [8, 2, 1, "", "random_state"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.NPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[8, 4, 1, "", "RETURN_TYPES"], [8, 3, 1, "", "get_collator"], [8, 3, 1, "", "get_labelled_collection"], [8, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.USimplexPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.util": [[8, 1, 1, "", "EarlyStop"], [8, 5, 1, "", "create_if_not_exist"], [8, 5, 1, "", "create_parent_dir"], [8, 5, 1, "", "download_file"], [8, 5, 1, "", "download_file_if_not_exists"], [8, 5, 1, "", "get_quapy_home"], [8, 5, 1, "", "map_parallel"], [8, 5, 1, "", "parallel"], [8, 5, 1, "", "pickled_resource"], [8, 5, 1, "", "save_text_file"], [8, 5, 1, "", "temp_seed"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"dataset": [0, 10], "review": 0, "twitter": 0, "sentiment": 0, "uci": 0, "machin": 0, "learn": 0, "issu": 0, "ad": 0, "custom": 0, "data": [0, 10], "process": 0, "evalu": [1, 8], "error": [1, 5, 8], "measur": 1, "protocol": [1, 8], "instal": 2, "requir": 2, "svm": 2, "perf": 2, "quantif": [2, 3, 4, 5], "orient": [2, 4], "loss": [2, 3, 4], "method": [3, 9, 11], "aggreg": [3, 11], "The": 3, "classifi": 3, "count": 3, "variant": 3, "expect": 3, "maxim": 3, "emq": 3, "helling": 3, "distanc": 3, "y": 3, "hdy": 3, "explicit": 3, "minim": 3, "meta": [3, 11], "model": [3, 4], "ensembl": 3, "quanet": 3, "neural": [3, 9, 11], "network": 3, "select": 4, "target": 4, "classif": [4, 9], "plot": [5, 8], "diagon": 5, "bia": 5, "drift": 5, "welcom": 6, "quapi": [6, 7, 8, 9, 10, 11], "": 6, "document": 6, "introduct": 6, "A": 6, "quick": 6, "exampl": 6, "featur": 6, "content": [6, 8, 9, 10, 11], "indic": 6, "tabl": 6, "packag": [8, 9, 10, 11], "subpackag": 8, "submodul": [8, 9, 10, 11], "function": 8, "model_select": 8, "util": 8, "modul": [8, 9, 10, 11], "calibr": 9, "svmperf": 9, "base": [10, 11], "preprocess": 10, "reader": 10, "non_aggreg": 11}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Datasets": [[0, "datasets"]], "Reviews Datasets": [[0, "reviews-datasets"]], "Twitter Sentiment Datasets": [[0, "twitter-sentiment-datasets"]], "UCI Machine Learning": [[0, "uci-machine-learning"]], "Issues:": [[0, "issues"]], "Adding Custom Datasets": [[0, "adding-custom-datasets"]], "Data Processing": [[0, "data-processing"]], "Evaluation": [[1, "evaluation"]], "Error Measures": [[1, "error-measures"]], "Evaluation Protocols": [[1, "evaluation-protocols"]], "Installation": [[2, "installation"]], "Requirements": [[2, "requirements"]], "SVM-perf with quantification-oriented losses": [[2, "svm-perf-with-quantification-oriented-losses"]], "Quantification Methods": [[3, "quantification-methods"]], "Aggregative Methods": [[3, "aggregative-methods"]], "The Classify & Count variants": [[3, "the-classify-count-variants"]], "Expectation Maximization (EMQ)": [[3, "expectation-maximization-emq"]], "Hellinger Distance y (HDy)": [[3, "hellinger-distance-y-hdy"]], "Explicit Loss Minimization": [[3, "explicit-loss-minimization"]], "Meta Models": [[3, "meta-models"]], "Ensembles": [[3, "ensembles"]], "The QuaNet neural network": [[3, "the-quanet-neural-network"]], "Model Selection": [[4, "model-selection"]], "Targeting a Quantification-oriented loss": [[4, "targeting-a-quantification-oriented-loss"]], "Targeting a Classification-oriented loss": [[4, "targeting-a-classification-oriented-loss"]], "Plotting": [[5, "plotting"]], "Diagonal Plot": [[5, "diagonal-plot"]], "Quantification bias": [[5, "quantification-bias"]], "Error by Drift": [[5, "error-by-drift"]], "Welcome to QuaPy\u2019s documentation!": [[6, "welcome-to-quapy-s-documentation"]], "Introduction": [[6, "introduction"]], "A quick example:": [[6, "a-quick-example"]], "Features": [[6, "features"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "quapy": [[7, "quapy"]], "Submodules": [[9, "submodules"], [8, "submodules"], [10, "submodules"], [11, "submodules"]], "Module contents": [[9, "module-quapy.classification"], [8, "module-quapy"], [10, "module-quapy.data"], [11, "module-quapy.method"]], "quapy.classification package": [[9, "quapy-classification-package"]], "quapy.classification.calibration": [[9, "quapy-classification-calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "quapy package": [[8, "quapy-package"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.protocol": [[8, "quapy-protocol"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.util": [[8, "module-quapy.util"]], "Subpackages": [[8, "subpackages"]], "quapy.data package": [[10, "quapy-data-package"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "quapy.method package": [[11, "quapy-method-package"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[8, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol"]], "domainmixer (class in quapy.protocol)": [[8, "quapy.protocol.DomainMixer"]], "earlystop (class in quapy.util)": [[8, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[8, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[8, "quapy.functional.HellingerDistance"]], "npp (class in quapy.protocol)": [[8, "quapy.protocol.NPP"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[8, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "topsoedistance() (in module quapy.functional)": [[8, "quapy.functional.TopsoeDistance"]], "usimplexpp (class in quapy.protocol)": [[8, "quapy.protocol.USimplexPP"]], "absolute_error() (in module quapy.error)": [[8, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[8, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[8, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[8, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[8, "quapy.error.ae"]], "best_model() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[8, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[8, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[8, "quapy.functional.check_prevalence_vector"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "create_if_not_exist() (in module quapy.util)": [[8, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[8, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[8, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[8, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[8, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[8, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluate"]], "evaluation_report() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluation_report"]], "f1_error() (in module quapy.error)": [[8, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[8, "quapy.error.f1e"]], "fit() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[8, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[8, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[8, "quapy.util.get_quapy_home"]], "kld() (in module quapy.error)": [[8, "quapy.error.kld"]], "mae() (in module quapy.error)": [[8, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[8, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[8, "quapy.error.mkld"]], "mnkld() (in module quapy.error)": [[8, "quapy.error.mnkld"]], "module": [[8, "module-quapy"], [8, "module-quapy.error"], [8, "module-quapy.evaluation"], [8, "module-quapy.functional"], [8, "module-quapy.model_selection"], [8, "module-quapy.plot"], [8, "module-quapy.protocol"], [8, "module-quapy.util"], [10, "module-quapy.data"], [10, "module-quapy.data.base"], [10, "module-quapy.data.datasets"], [10, "module-quapy.data.preprocessing"], [10, "module-quapy.data.reader"], [11, "module-quapy.method"], [11, "module-quapy.method.aggregative"], [11, "module-quapy.method.base"], [11, "module-quapy.method.meta"], [11, "module-quapy.method.neural"], [11, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[8, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[8, "quapy.error.mse"]], "nkld() (in module quapy.error)": [[8, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[8, "quapy.functional.normalize_prevalence"]], "num_prevalence_combinations() (in module quapy.functional)": [[8, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "parallel() (in module quapy.util)": [[8, "quapy.util.parallel"]], "pickled_resource() (in module quapy.util)": [[8, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[8, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[8, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[8, "module-quapy"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.protocol": [[8, "module-quapy.protocol"]], "quapy.util": [[8, "module-quapy.util"]], "rae() (in module quapy.error)": [[8, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[8, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[8, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[8, "quapy.error.smooth"]], "strprev() (in module quapy.functional)": [[8, "quapy.functional.strprev"]], "temp_seed() (in module quapy.util)": [[8, "quapy.util.temp_seed"]], "total() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[8, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_simplex_sampling"]], "dataset (class in quapy.data.base)": [[10, "quapy.data.base.Dataset"]], "indextransformer (class in quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.IndexTransformer"]], "labelledcollection (class in quapy.data.base)": [[10, "quapy.data.base.LabelledCollection"]], "splitstratified() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.SplitStratified"]], "x (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.X"]], "xp (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xp"]], "xy (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xy"]], "add_word() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.add_word"]], "binarize() (in module quapy.data.reader)": [[10, "quapy.data.reader.binarize"]], "binary (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.binary"]], "binary (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.binary"]], "classes_ (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.classes_"]], "counts() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.counts"]], "fetch_ucidataset() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCIDataset"]], "fetch_ucilabelledcollection() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCILabelledCollection"]], "fetch_lequa2022() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_lequa2022"]], "fetch_reviews() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_reviews"]], "fetch_twitter() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_twitter"]], "fit() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit"]], "fit_transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit_transform"]], "from_csv() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_csv"]], "from_sparse() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_sparse"]], "from_text() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_text"]], "index() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.index"]], "kfcv() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.kFCV"]], "kfcv() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.kFCV"]], "load() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.load"]], "load() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.load"]], "n_classes (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.n_classes"]], "n_classes (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.n_classes"]], "p (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.p"]], "prevalence() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.prevalence"]], "quapy.data": [[10, "module-quapy.data"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "reduce_columns() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.reduce_columns"]], "reindex_labels() (in module quapy.data.reader)": [[10, "quapy.data.reader.reindex_labels"]], "sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling"]], "sampling_from_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_from_index"]], "sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_index"]], "split_random() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_random"]], "split_stratified() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_stratified"]], "standardize() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.standardize"]], "stats() (quapy.data.base.dataset method)": [[10, "quapy.data.base.Dataset.stats"]], "stats() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.stats"]], "text2tfidf() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.text2tfidf"]], "train_test (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.train_test"]], "transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.transform"]], "uniform_sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling"]], "uniform_sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling_index"]], "vocabulary_size (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.vocabulary_size"]], "vocabulary_size() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.vocabulary_size"]], "warn() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.warn"]], "y (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.y"]], "acc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativeprobabilisticquantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier"]], "aggregativequantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeQuantifier"]], "basequantifier (class in quapy.method.base)": [[11, "quapy.method.base.BaseQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[11, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.CC"]], "classifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ClassifyAndCount"]], "distributionmatching (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DistributionMatching"]], "dys (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[11, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[11, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[11, "quapy.method.meta.EHDy"]], "elm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ELM"]], "em() (quapy.method.aggregative.emq class method)": [[11, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.EMQ"]], "epacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[11, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "explicitlossminimisation (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExplicitLossMinimisation"]], "hdy (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.HellingerDistanceY"]], "max (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.MAX_ITER"]], "ms (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS"]], "ms2 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "mediansweep (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep"]], "mediansweep2 (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep2"]], "onevsall (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.OneVsAll"]], "onevsallgeneric (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetModule"]], "quanettrainer (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetTrainer"]], "sld (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SMM"]], "svmae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMAE"]], "svmkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMKLD"]], "svmnkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMNKLD"]], "svmq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMQ"]], "svmrae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMRAE"]], "t50 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.T50"]], "thresholdoptimization (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[11, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.X"]], "aggregate() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.aggregate"]], "aggregate() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.aggregate"]], "aggregative (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.aggregative"]], "classes (quapy.method.base.onevsallgeneric property)": [[11, "quapy.method.base.OneVsAllGeneric.classes"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.aggregative.onevsall property)": [[11, "quapy.method.aggregative.OneVsAll.classes_"]], "classes_ (quapy.method.neural.quanettrainer property)": [[11, "quapy.method.neural.QuaNetTrainer.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classify() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.classify"]], "classify() (quapy.method.aggregative.aggregativeprobabilisticquantifier method)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier.classify"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.classify"]], "classify() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.classify"]], "classify() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.classify"]], "clean_checkpoint() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint_dir"]], "cross_generate_predictions() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions"]], "cross_generate_predictions_depr() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions_depr"]], "device (quapy.method.neural.quanetmodule property)": [[11, "quapy.method.neural.QuaNetModule.device"]], "ensemblefactory() (in module quapy.method.meta)": [[11, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.fit"]], "fit() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.fit"]], "fit() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.fit"]], "fit() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.fit"]], "fit() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.fit"]], "fit() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.fit"]], "fit() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.fit"]], "fit() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.fit"]], "fit() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.fit"]], "fit() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.fit"]], "fit() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.fit"]], "fit() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "forward() (quapy.method.neural.quanetmodule method)": [[11, "quapy.method.neural.QuaNetModule.forward"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[11, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_params() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.get_params"]], "get_params() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.get_params"]], "get_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.get_params"]], "get_probability_distribution() (in module quapy.method.meta)": [[11, "quapy.method.meta.get_probability_distribution"]], "mae_loss() (in module quapy.method.neural)": [[11, "quapy.method.neural.mae_loss"]], "predict_proba() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quapy.method": [[11, "module-quapy.method"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]], "set_params() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.set_params"]], "set_params() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.set_params"]], "set_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.set_params"]], "solve_adjustment() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.solve_adjustment"]], "training (quapy.method.neural.quanetmodule attribute)": [[11, "quapy.method.neural.QuaNetModule.training"]]}}) \ No newline at end of file diff --git a/examples/custom_quantifier.py b/examples/custom_quantifier.py new file mode 100644 index 0000000..a025b87 --- /dev/null +++ b/examples/custom_quantifier.py @@ -0,0 +1,69 @@ +import quapy as qp +from data import LabelledCollection +from method.base import BaseQuantifier, BinaryQuantifier +from model_selection import GridSearchQ +from quapy.method.aggregative import PACC, AggregativeProbabilisticQuantifier +from quapy.protocol import APP +import numpy as np +from sklearn.linear_model import LogisticRegression + + +# Define a custom quantifier: for this example, we will consider a new quantification algorithm that uses a +# logistic regressor for generating posterior probabilities, and then applies a custom threshold value to the +# posteriors. Since the quantifier internally uses a classifier, it is an aggregative quantifier; and since it +# relies on posterior probabilities, then it is a probabilistic aggregative quantifier. Note also it has an +# internal hyperparameter (let say, alpha) which is the decision threshold. Let's also assume the quantifier +# is binary, for simplicity. + +class MyQuantifier(AggregativeProbabilisticQuantifier, BinaryQuantifier): + def __init__(self, classifier, alpha=0.5): + self.alpha = alpha + # aggregative quantifiers have an internal self.classifier attribute + self.classifier = classifier + + def fit(self, data: LabelledCollection, fit_classifier=True): + assert fit_classifier, 'this quantifier needs to fit the classifier!' + self.classifier.fit(*data.Xy) + return self + + # in general, we would need to implement the method quantify(self, instances) but, since this method is of + # type aggregative, we can simply implement the method aggregate, which has the following interface + def aggregate(self, classif_predictions: np.ndarray): + # the posterior probabilities have already been generated by the quantify method; we only need to + # specify what to do with them + positive_probabilities = classif_predictions[:, 1] + crisp_decisions = positive_probabilities > self.alpha + pos_prev = crisp_decisions.mean() + neg_prev = 1-pos_prev + return np.asarray([neg_prev, pos_prev]) + + +if __name__ == '__main__': + + qp.environ['SAMPLE_SIZE'] = 100 + + # define an instance of our custom quantifier + quantifier = MyQuantifier(LogisticRegression(), alpha=0.5) + + # load the IMDb dataset + train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test + train, val = train.split_stratified(train_prop=0.75) + + # model selection + # let us assume we want to explore our hyperparameter alpha along with one hyperparameter of the classifier + param_grid = { + 'alpha': np.linspace(0,1,11), # quantifier-dependent hyperparameter + 'classifier__C': np.logspace(-2,2,5) # classifier-dependent hyperparameter + } + quantifier = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=True).fit(train) + + # evaluation + mae = qp.evaluation.evaluate(quantifier, protocol=APP(test), error_metric='mae') + + print(f'MAE = {mae:.4f}') + + # final remarks: this method is only for demonstration purposes and makes little sense in general. The method relies + # on an hyperparameter alpha for binarizing the posterior probabilities. A much better way for fulfilling this + # goal would be to calibrate the classifier (LogisticRegression is already reasonably well calibrated) and then + # simply cut at 0.5. + diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index f2deea0..3fb21f6 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -1,11 +1,19 @@ -# main changes in 0.1.7 +Change Log 0.1.7 +--------------------- -- Protocols are now abstracted as AbstractProtocol. There is a new class extending AbstractProtocol called +- Protocols are now abstracted as instances of AbstractProtocol. There is a new class extending AbstractProtocol called AbstractStochasticSeededProtocol, which implements a seeding policy to allow replicate the series of samplings. - There are some examples of protocols, APP, NPP, USimplexPP, CovariateShiftPP (experimental). + There are some examples of protocols, APP, NPP, USimplexPP, DomainMixer (experimental). The idea is to start the sampling by simply calling the __call__ method. This change has a great impact in the framework, since many functions in qp.evaluation, qp.model_selection, - and sampling functions in LabelledCollection make use of the old functions. + and sampling functions in LabelledCollection relied of the old functions. E.g., the functionality of + qp.evaluation.artificial_prevalence_report or qp.evaluation.natural_prevalence_report is now obtained by means of + qp.evaluation.report which takes a protocol as an argument. I have not maintained compatibility with the old + interfaces because I did not really like them. Check the wiki guide and the examples for more details. + + check guides + + check examples - ACC, PACC, Forman's threshold variants have been parallelized. @@ -51,47 +59,31 @@ multiclass quantification. That is to say, one could get a multiclass variant of the (originally binary) HDy method aligned with the Firat's formulation. +- internal method properties "binary", "aggregative", and "probabilistic" have been removed; these conditions are + checked via isinstance + +- quantifiers (i.e., classes that inherit from BaseQuantifier) are not forced to implement classes_ or n_classes; + these can be used anyway internally, but the framework will not suppose (nor impose) that a quantifier implements + them + +- qp.evaluation.prediction has been optimized so that, if a quantifier is of type aggregative, and if the evaluation + protocol is of type OnLabelledCollection, then the computation is faster. In this specific case, the predictions + are issued only once and for all, and not for each sample. An exception to this (which is implement also), is + when the number of instances across all samples is anyway smaller than the number of instances in the original + labelled collection; in this case the heuristic is of no help, and is therefore not applied. + +- the distinction between "classify" and "posterior_probabilities" has been removed in Aggregative quantifiers, + so that probabilistic classifiers return posterior probabilities, while non-probabilistic quantifiers + return crisp decisions. + Things to fix: -- calibration with recalibration methods has to be fixed for exact_train_prev in EMQ (conflicts with clone, deepcopy, etc.) -- clean functions like binary, aggregative, probabilistic, etc; those should be resolved via isinstance(): - this is not working; I don't know how to make the isinstance work. Looks like there is some problem with the - path of the imported class wrt the path of the class that arrives from another module... -- clean classes_ and n_classes from methods (maybe not from aggregative ones, but those have to be used only - internally and not imposed in any abstract class) -- optimize "qp.evaluation.prediction" for aggregative methods (pre-classification) +-------------- +- OneVsAll is duplicated (in aggregative and in general), and is not well documented. It is not working either. + Check method def __parallel(self, func, *args, **kwargs) in aggregative.OneVsAll - update unit tests -- Policies should be able to set their output to "labelled_collection" or "instances_prevalence" or something similar. -- Policies should implement the "gen()" one, taking a reader function as an input, and a folder path maybe -- Review all documentation, redo the Sphinx doc, update Wikis... +- update Wikis... - Resolve the OneVsAll thing (it is in base.py and in aggregative.py) -- Better handle the environment (e.g., with n_jobs) -- test cross_generate_predictions and cancel cross_generate_predictions_depr - Add a proper log? -- test LoadSamplesFromDirectory (in protocols.py) -- improve plots? -- I have removed the distinction between "classify" and "posterior_probabilities" in the Aggregative quantifiers, - so that probabilistic classifiers actually return posterior probabilities, while non-probabilistic quantifiers - return instead crisp decisions. The idea was to unify the quantification function (i.e., now it is always - classify & aggregate, irrespective of the class). However, this has caused a problem with OneVsAll. This has to - be checked, since it is now innecessarily complicated (it also has old references to .probabilistic, and all this - stuff). -- Check method def __parallel(self, func, *args, **kwargs) in aggregative.OneVsAll +- improve plots +- documentation of protocols is incomplete -New features: -- Add LeQua2022 to datasets (everything automatic, and with proper protocols "gen") -- Add an "experimental room", with scripts to quickly test new ideas and see results. - -# 0.1.7 -# change the LabelledCollection API (removing protocol-related samplings) -# need to change the two references to the above in the wiki / doc, and code examples... -# removed artificial_prevalence_sampling from functional - -# also: some parameters in the init could be used to indicate that the method should return a tuple with -# unlabelled instances and the vector of prevalence values (and not a LabelledCollection). -# Or: this can be done in a different function; i.e., we use one function (now __call__) to return -# LabelledCollections, and another new one for returning the other output, which is more general for -# evaluation purposes. - -# the so-called "gen" function has to be implemented as a protocol. The problem here is that this function -# should be able to return only unlabelled instances plus a vector of prevalences (and not LabelledCollections). -# This was coded as different functions in 0.1.6 diff --git a/quapy/__init__.py b/quapy/__init__.py index 54b1603..47a7388 100644 --- a/quapy/__init__.py +++ b/quapy/__init__.py @@ -23,9 +23,28 @@ environ = { } -def get_njobs(n_jobs): +def _get_njobs(n_jobs): + """ + If `n_jobs` is None, then it returns `environ['N_JOBS']`; if otherwise, returns `n_jobs`. + + :param n_jobs: the number of `n_jobs` or None if not specified + :return: int + """ return environ['N_JOBS'] if n_jobs is None else n_jobs +def _get_sample_size(sample_size): + """ + If `sample_size` is None, then it returns `environ['SAMPLE_SIZE']`; if otherwise, returns `sample_size`. + If none of these are set, then a ValueError exception is raised. + + :param sample_size: integer or None + :return: int + """ + sample_size = environ['SAMPLE_SIZE'] if sample_size is None else sample_size + if sample_size is None: + raise ValueError('neither sample_size nor qp.environ["SAMPLE_SIZE"] have been specified') + return sample_size + diff --git a/quapy/classification/calibration.py b/quapy/classification/calibration.py index 69a7e14..f35bb97 100644 --- a/quapy/classification/calibration.py +++ b/quapy/classification/calibration.py @@ -12,12 +12,18 @@ import numpy as np class RecalibratedProbabilisticClassifier: + """ + Abstract class for (re)calibration method from `abstention.calibration`, as defined in + `Alexandari, A., Kundaje, A., & Shrikumar, A. (2020, November). Maximum likelihood with bias-corrected calibration + is hard-to-beat at label shift adaptation. In International Conference on Machine Learning (pp. 222-232). PMLR. + `_: + """ pass class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabilisticClassifier): """ - Applies a (re)calibration method from abstention.calibration, as defined in + Applies a (re)calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier @@ -25,7 +31,7 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi :param val_split: indicate an integer k for performing kFCV to obtain the posterior probabilities, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer); default=None :param verbose: whether or not to display information in the standard output """ @@ -38,6 +44,13 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi self.verbose = verbose def fit(self, X, y): + """ + Fits the calibration for the probabilistic classifier. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ k = self.val_split if isinstance(k, int): if k < 2: @@ -49,6 +62,15 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi return self.fit_cv(X, y) def fit_cv(self, X, y): + """ + Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all + training instances via cross-validation, and then retrains the classifier on all training instances. + The posterior probabilities thus generated are used for calibrating the outpus of the classifier. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ posteriors = cross_val_predict( self.classifier, X, y, cv=self.val_split, n_jobs=self.n_jobs, verbose=self.verbose, method='predict_proba' ) @@ -58,6 +80,16 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi return self def fit_tr_val(self, X, y): + """ + Fits the calibration in a train/val-split manner, i.e.t, it partitions the training instances into a + training and a validation set, and then uses the training samples to learn classifier which is then used + to generate posterior probabilities for the held-out validation data. These posteriors are used to calibrate + the classifier. The classifier is not retrained on the whole dataset. + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :param y: array-like of shape `(n_samples,)` with the class labels + :return: self + """ Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=self.val_split, stratify=y) self.classifier.fit(Xtr, ytr) posteriors = self.classifier.predict_proba(Xva) @@ -66,32 +98,49 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi return self def predict(self, X): + """ + Predicts class labels for the data instances in `X` + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :return: array-like of shape `(n_samples,)` with the class label predictions + """ return self.classifier.predict(X) def predict_proba(self, X): + """ + Generates posterior probabilities for the data instances in `X` + + :param X: array-like of shape `(n_samples, n_features)` with the data instances + :return: array-like of shape `(n_samples, n_classes)` with posterior probabilities + """ posteriors = self.classifier.predict_proba(X) return self.calibration_function(posteriors) @property def classes_(self): + """ + Returns the classes on which the classifier has been trained on + + :return: array-like of shape `(n_classes)` + """ return self.classifier.classes_ class NBVSCalibration(RecalibratedProbabilisticClassifierBase): """ - Applies the No-Bias Vector Scaling (NBVS) calibration method from abstention.calibration, as defined in + Applies the No-Bias Vector Scaling (NBVS) calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) :param verbose: whether or not to display information in the standard output """ - def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): self.classifier = classifier self.calibrator = NoBiasVectorScaling(verbose=verbose) self.val_split = val_split @@ -101,19 +150,19 @@ class NBVSCalibration(RecalibratedProbabilisticClassifierBase): class BCTSCalibration(RecalibratedProbabilisticClassifierBase): """ - Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from abstention.calibration, as defined in + Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) :param verbose: whether or not to display information in the standard output """ - def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): self.classifier = classifier self.calibrator = TempScaling(verbose=verbose, bias_positions='all') self.val_split = val_split @@ -123,19 +172,19 @@ class BCTSCalibration(RecalibratedProbabilisticClassifierBase): class TSCalibration(RecalibratedProbabilisticClassifierBase): """ - Applies the Temperature Scaling (TS) calibration method from abstention.calibration, as defined in + Applies the Temperature Scaling (TS) calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) :param verbose: whether or not to display information in the standard output """ - def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): self.classifier = classifier self.calibrator = TempScaling(verbose=verbose) self.val_split = val_split @@ -145,19 +194,19 @@ class TSCalibration(RecalibratedProbabilisticClassifierBase): class VSCalibration(RecalibratedProbabilisticClassifierBase): """ - Applies the Vector Scaling (VS) calibration method from abstention.calibration, as defined in + Applies the Vector Scaling (VS) calibration method from `abstention.calibration`, as defined in `Alexandari et al. paper `_: :param classifier: a scikit-learn probabilistic classifier :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the training instances (the rest is used for training). In any case, the classifier is retrained in the whole - training set afterwards. + training set afterwards. Default value is 5. :param n_jobs: indicate the number of parallel workers (only when val_split is an integer) :param verbose: whether or not to display information in the standard output """ - def __init__(self, classifier, val_split=5, n_jobs=1, verbose=False): + def __init__(self, classifier, val_split=5, n_jobs=None, verbose=False): self.classifier = classifier self.calibrator = VectorScaling(verbose=verbose) self.val_split = val_split diff --git a/quapy/classification/svmperf.py b/quapy/classification/svmperf.py index 2f6ad90..176b102 100644 --- a/quapy/classification/svmperf.py +++ b/quapy/classification/svmperf.py @@ -94,6 +94,7 @@ class SVMperf(BaseEstimator, ClassifierMixin): def predict(self, X): """ Predicts labels for the instances `X` + :param X: array-like of shape `(n_samples, n_features)` instances to classify :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of instances in `X` diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index b35343b..241cd04 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -554,7 +554,31 @@ def _df_replace(df, col, repl={'yes': 1, 'no':0}, astype=float): def fetch_lequa2022(task, data_home=None): """ + Loads the official datasets provided for the `LeQua `_ competition. + In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification + problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead. + Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification + problems consisting of estimating the class prevalence values of 28 different merchandise products. + We refer to the `Esuli, A., Moreo, A., Sebastiani, F., & Sperduti, G. (2022). + A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify. + `_ for a detailed description + on the tasks and datasets. + + The datasets are downloaded only once, and stored for fast reuse. + + See `lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these + datasets. + + + :param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B + :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default + ~/quay_data/ directory) + :return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of + :class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of + :class:`quapy.protocol.SamplesFromDir`, i.e., are sampling protocols that return a series of samples + labelled by prevalence. """ + from quapy.data._lequa2022 import load_raw_documents, load_vector_documents, SamplesFromDir assert task in LEQUA2022_TASKS, \ diff --git a/quapy/data/preprocessing.py b/quapy/data/preprocessing.py index a987900..e65ccf7 100644 --- a/quapy/data/preprocessing.py +++ b/quapy/data/preprocessing.py @@ -88,7 +88,7 @@ def standardize(dataset: Dataset, inplace=False): :param dataset: a :class:`quapy.data.base.Dataset` object :param inplace: set to True if the transformation is to be applied inplace, or to False (default) if a new :class:`quapy.data.base.Dataset` is to be returned - :return: + :return: an instance of :class:`quapy.data.base.Dataset` """ s = StandardScaler(copy=not inplace) training = s.fit_transform(dataset.training.instances) @@ -110,7 +110,7 @@ def index(dataset: Dataset, min_df=5, inplace=False, **kwargs): :param min_df: minimum number of occurrences below which the term is replaced by a `UNK` index :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default) :param kwargs: the rest of parameters of the transformation (as for sklearn's - `CountVectorizer _`) + `CountVectorizer _`) :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current :class:`quapy.data.base.Dataset` (inplace=True) consisting of lists of integer values representing indices. """ @@ -147,7 +147,8 @@ class IndexTransformer: contains, and that would be generated by sklearn's `CountVectorizer `_ - :param kwargs: keyworded arguments from `CountVectorizer `_ + :param kwargs: keyworded arguments from + `CountVectorizer `_ """ def __init__(self, **kwargs): @@ -179,7 +180,7 @@ class IndexTransformer: """ # given the number of tasks and the number of jobs, generates the slices for the parallel processes assert self.unk != -1, 'transform called before fit' - n_jobs = qp.get_njobs(n_jobs) + n_jobs = qp._get_njobs(n_jobs) indexed = map_parallel(func=self._index, args=X, n_jobs=n_jobs) return np.asarray(indexed) diff --git a/quapy/depr_evaluation.py b/quapy/depr_evaluation.py deleted file mode 100644 index 0846ab0..0000000 --- a/quapy/depr_evaluation.py +++ /dev/null @@ -1,439 +0,0 @@ -from typing import Union, Callable, Iterable -import numpy as np -from tqdm import tqdm -import inspect - -import quapy as qp -from quapy.data import LabelledCollection -from quapy.method.base import BaseQuantifier -from quapy.util import temp_seed -import quapy.functional as F -import pandas as pd - - -def artificial_prevalence_prediction( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - n_prevpoints=101, - repeats=1, - eval_budget: int = None, - n_jobs=1, - random_seed=42, - verbose=False): - """ - Performs the predictions for all samples generated according to the Artificial Prevalence Protocol (APP). - The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., - [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid - combination of prevalence values is indicated by `repeats`. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform APP - :param sample_size: integer, the size of the samples - :param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget - is specified; default 101, i.e., steps of 1%) - :param repeats: integer, the number of repetitions for each prevalence (default 1) - :param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if - there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this - will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and - since setting `n_prevpoints=6` would produce more than 20 evaluations. - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param verbose: if True, shows a progress bar - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - `(n_prevpoints*repeats)` and `n` the number of classes. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - """ - - n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, repeats, verbose) - - with temp_seed(random_seed): - indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, repeats)) - - return _predict_from_indexes(indexes, model, test, n_jobs, verbose) - - -def natural_prevalence_prediction( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - repeats, - n_jobs=1, - random_seed=42, - verbose=False): - """ - Performs the predictions for all samples generated according to the Natural Prevalence Protocol (NPP). - The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural - prevalence of the collection. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform NPP - :param sample_size: integer, the size of the samples - :param repeats: integer, the number of samples to generate - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param verbose: if True, shows a progress bar - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - `(repeats)` and `n` the number of classes. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - """ - - with temp_seed(random_seed): - indexes = list(test.natural_sampling_index_generator(sample_size, repeats)) - - return _predict_from_indexes(indexes, model, test, n_jobs, verbose) - - -def gen_prevalence_prediction(model: BaseQuantifier, gen_fn: Callable, eval_budget=None): - """ - Generates prevalence predictions for a custom protocol defined as a generator function that yields - samples at each iteration. The sequence of samples is processed exhaustively if `eval_budget=None` - or up to the `eval_budget` iterations if specified. - - :param model: the model in charge of generating the class prevalence estimations - :param gen_fn: a generator function yielding one sample at each iteration - :param eval_budget: a maximum number of evaluations to run. Set to None (default) for exploring the - entire sequence - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - generated and `n` the number of classes. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - """ - if not inspect.isgenerator(gen_fn()): - raise ValueError('param "gen_fun" is not a callable returning a generator') - - if not isinstance(eval_budget, int): - eval_budget = -1 - - true_prevalences, estim_prevalences = [], [] - for sample_instances, true_prev in gen_fn(): - true_prevalences.append(true_prev) - estim_prevalences.append(model.quantify(sample_instances)) - eval_budget -= 1 - if eval_budget == 0: - break - - true_prevalences = np.asarray(true_prevalences) - estim_prevalences = np.asarray(estim_prevalences) - - return true_prevalences, estim_prevalences - - -def _predict_from_indexes( - indexes, - model: BaseQuantifier, - test: LabelledCollection, - n_jobs=1, - verbose=False): - - if model.aggregative: #isinstance(model, qp.method.aggregative.AggregativeQuantifier): - # print('\tinstance of aggregative-quantifier') - quantification_func = model.aggregate - if model.probabilistic: # isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier): - # print('\t\tinstance of probabilitstic-aggregative-quantifier') - preclassified_instances = model.posterior_probabilities(test.instances) - else: - # print('\t\tinstance of hard-aggregative-quantifier') - preclassified_instances = model.classify(test.instances) - test = LabelledCollection(preclassified_instances, test.labels) - else: - # print('\t\tinstance of base-quantifier') - quantification_func = model.quantify - - def _predict_prevalences(index): - sample = test.sampling_from_index(index) - true_prevalence = sample.prevalence() - estim_prevalence = quantification_func(sample.instances) - return true_prevalence, estim_prevalence - - pbar = tqdm(indexes, desc='[artificial sampling protocol] generating predictions') if verbose else indexes - results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs) - - true_prevalences, estim_prevalences = zip(*results) - true_prevalences = np.asarray(true_prevalences) - estim_prevalences = np.asarray(estim_prevalences) - - return true_prevalences, estim_prevalences - - -def artificial_prevalence_report( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - n_prevpoints=101, - repeats=1, - eval_budget: int = None, - n_jobs=1, - random_seed=42, - error_metrics:Iterable[Union[str,Callable]]='mae', - verbose=False): - """ - Generates an evaluation report for all samples generated according to the Artificial Prevalence Protocol (APP). - The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., - [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid - combination of prevalence values is indicated by `repeats`. - Te report takes the form of a - pandas' `dataframe `_ - in which the rows correspond to different samples, and the columns inform of the true prevalence values, - the estimated prevalence values, and the score obtained by each of the evaluation measures indicated. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform APP - :param sample_size: integer, the size of the samples - :param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget - is specified; default 101, i.e., steps of 1%) - :param repeats: integer, the number of repetitions for each prevalence (default 1) - :param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if - there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this - will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and - since setting `n_prevpoints=6` would produce more than 20 evaluations. - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param error_metrics: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function; optionally, a list of strings or callables can be indicated, if the results - are to be evaluated with more than one error metric. Default is "mae" - :param verbose: if True, shows a progress bar - :return: pandas' dataframe with rows corresponding to different samples, and with columns informing of the - true prevalence values, the estimated prevalence values, and the score obtained by each of the evaluation - measures indicated. - """ - - true_prevs, estim_prevs = artificial_prevalence_prediction( - model, test, sample_size, n_prevpoints, repeats, eval_budget, n_jobs, random_seed, verbose - ) - return _prevalence_report(true_prevs, estim_prevs, error_metrics) - - -def natural_prevalence_report( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - repeats=1, - n_jobs=1, - random_seed=42, - error_metrics:Iterable[Union[str,Callable]]='mae', - verbose=False): - """ - Generates an evaluation report for all samples generated according to the Natural Prevalence Protocol (NPP). - The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural - prevalence of the collection. - Te report takes the form of a - pandas' `dataframe `_ - in which the rows correspond to different samples, and the columns inform of the true prevalence values, - the estimated prevalence values, and the score obtained by each of the evaluation measures indicated. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform NPP - :param sample_size: integer, the size of the samples - :param repeats: integer, the number of samples to generate - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param error_metrics: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function; optionally, a list of strings or callables can be indicated, if the results - are to be evaluated with more than one error metric. Default is "mae" - :param verbose: if True, shows a progress bar - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - `(repeats)` and `n` the number of classes. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - - """ - - true_prevs, estim_prevs = natural_prevalence_prediction( - model, test, sample_size, repeats, n_jobs, random_seed, verbose - ) - return _prevalence_report(true_prevs, estim_prevs, error_metrics) - - -def gen_prevalence_report(model: BaseQuantifier, gen_fn: Callable, eval_budget=None, - error_metrics:Iterable[Union[str,Callable]]='mae'): - """ - GGenerates an evaluation report for a custom protocol defined as a generator function that yields - samples at each iteration. The sequence of samples is processed exhaustively if `eval_budget=None` - or up to the `eval_budget` iterations if specified. - Te report takes the form of a - pandas' `dataframe `_ - in which the rows correspond to different samples, and the columns inform of the true prevalence values, - the estimated prevalence values, and the score obtained by each of the evaluation measures indicated. - - :param model: the model in charge of generating the class prevalence estimations - :param gen_fn: a generator function yielding one sample at each iteration - :param eval_budget: a maximum number of evaluations to run. Set to None (default) for exploring the - entire sequence - :return: a tuple containing two `np.ndarrays` of shape `(m,n,)` with `m` the number of samples - generated. The first one contains the true prevalence values - for the samples generated while the second one contains the prevalence estimations - """ - true_prevs, estim_prevs = gen_prevalence_prediction(model, gen_fn, eval_budget) - return _prevalence_report(true_prevs, estim_prevs, error_metrics) - - -def _prevalence_report( - true_prevs, - estim_prevs, - error_metrics: Iterable[Union[str, Callable]] = 'mae'): - - if isinstance(error_metrics, str): - error_metrics = [error_metrics] - - error_names = [e if isinstance(e, str) else e.__name__ for e in error_metrics] - error_funcs = [qp.error.from_name(e) if isinstance(e, str) else e for e in error_metrics] - assert all(hasattr(e, '__call__') for e in error_funcs), 'invalid error functions' - - df = pd.DataFrame(columns=['true-prev', 'estim-prev'] + error_names) - for true_prev, estim_prev in zip(true_prevs, estim_prevs): - series = {'true-prev': true_prev, 'estim-prev': estim_prev} - for error_name, error_metric in zip(error_names, error_funcs): - score = error_metric(true_prev, estim_prev) - series[error_name] = score - df = df.append(series, ignore_index=True) - - return df - - -def artificial_prevalence_protocol( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - n_prevpoints=101, - repeats=1, - eval_budget: int = None, - n_jobs=1, - random_seed=42, - error_metric:Union[str,Callable]='mae', - verbose=False): - """ - Generates samples according to the Artificial Prevalence Protocol (APP). - The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g., - [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of - prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ..., - [1, 0, 0] prevalence values of size `sample_size` will be considered). The number of samples for each valid - combination of prevalence values is indicated by `repeats`. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform APP - :param sample_size: integer, the size of the samples - :param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget - is specified; default 101, i.e., steps of 1%) - :param repeats: integer, the number of repetitions for each prevalence (default 1) - :param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if - there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this - will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and - since setting `n_prevpoints=6` would produce more than 20 evaluations. - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: integer, allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function - :param verbose: set to True (default False) for displaying some information on standard output - :return: yields one sample at a time - """ - - if isinstance(error_metric, str): - error_metric = qp.error.from_name(error_metric) - - assert hasattr(error_metric, '__call__'), 'invalid error function' - - true_prevs, estim_prevs = artificial_prevalence_prediction( - model, test, sample_size, n_prevpoints, repeats, eval_budget, n_jobs, random_seed, verbose - ) - - return error_metric(true_prevs, estim_prevs) - - -def natural_prevalence_protocol( - model: BaseQuantifier, - test: LabelledCollection, - sample_size, - repeats=1, - n_jobs=1, - random_seed=42, - error_metric:Union[str,Callable]='mae', - verbose=False): - """ - Generates samples according to the Natural Prevalence Protocol (NPP). - The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural - prevalence of the collection. - - :param model: the model in charge of generating the class prevalence estimations - :param test: the test set on which to perform NPP - :param sample_size: integer, the size of the samples - :param repeats: integer, the number of samples to generate - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect - any other random process (default 42) - :param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function - :param verbose: if True, shows a progress bar - :return: yields one sample at a time - """ - - if isinstance(error_metric, str): - error_metric = qp.error.from_name(error_metric) - - assert hasattr(error_metric, '__call__'), 'invalid error function' - - true_prevs, estim_prevs = natural_prevalence_prediction( - model, test, sample_size, repeats, n_jobs, random_seed, verbose - ) - - return error_metric(true_prevs, estim_prevs) - - -def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], error_metric:Union[str, Callable], n_jobs:int=-1): - """ - Evaluates a model on a sequence of test samples in terms of a given error metric. - - :param model: the model in charge of generating the class prevalence estimations - :param test_samples: an iterable yielding one sample at a time - :param error_metric: a string indicating the name of the error (as defined in :mod:`quapy.error`) or a - callable error function - :param n_jobs: integer, number of jobs to be run in parallel (default 1) - :return: the score obtained using `error_metric` - """ - if isinstance(error_metric, str): - error_metric = qp.error.from_name(error_metric) - scores = qp.util.parallel(_delayed_eval, ((model, Ti, error_metric) for Ti in test_samples), n_jobs=n_jobs) - return np.mean(scores) - - -def _delayed_eval(args): - model, test, error = args - prev_estim = model.quantify(test.instances) - prev_true = test.prevalence() - return error(prev_true, prev_estim) - - -def _check_num_evals(n_classes, n_prevpoints=None, eval_budget=None, repeats=1, verbose=False): - if n_prevpoints is None and eval_budget is None: - raise ValueError('either n_prevpoints or eval_budget has to be specified') - elif n_prevpoints is None: - assert eval_budget > 0, 'eval_budget must be a positive integer' - n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats) - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'setting n_prevpoints={n_prevpoints} so that the number of ' - f'evaluations ({eval_computations}) does not exceed the evaluation ' - f'budget ({eval_budget})') - elif eval_budget is None: - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'{eval_computations} evaluations will be performed for each ' - f'combination of hyper-parameters') - else: - eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if eval_computations > eval_budget: - n_prevpoints = F.get_nprevpoints_approximation(eval_budget, n_classes, repeats) - new_eval_computations = F.num_prevalence_combinations(n_prevpoints, n_classes, repeats) - if verbose: - print(f'the budget of evaluations would be exceeded with ' - f'n_prevpoints={n_prevpoints}. Chaning to n_prevpoints={n_prevpoints}. This will produce ' - f'{new_eval_computations} evaluation computations for each hyper-parameter combination.') - return n_prevpoints, eval_computations - diff --git a/quapy/error.py b/quapy/error.py index 2047929..c0cd157 100644 --- a/quapy/error.py +++ b/quapy/error.py @@ -11,11 +11,6 @@ def from_name(err_name): """ assert err_name in ERROR_NAMES, f'unknown error {err_name}' callable_error = globals()[err_name] - # if err_name in QUANTIFICATION_ERROR_SMOOTH_NAMES: - # eps = __check_eps() - # def bound_callable_error(y_true, y_pred): - # return callable_error(y_true, y_pred, eps) - # return bound_callable_error return callable_error diff --git a/quapy/functional.py b/quapy/functional.py index 3ee46ff..a1f0ba2 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -70,7 +70,7 @@ def HellingerDistance(P, Q): The HD for two discrete distributions of `k` bins is defined as: .. math:: - HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 } + HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \\sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 } :param P: real-valued array-like of shape `(k,)` representing a discrete distribution :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution @@ -78,11 +78,21 @@ def HellingerDistance(P, Q): """ return np.sqrt(np.sum((np.sqrt(P) - np.sqrt(Q))**2)) + def TopsoeDistance(P, Q, epsilon=1e-20): - """ Topsoe """ - return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + - Q*np.log((2*Q+epsilon)/(P+Q+epsilon))) + Topsoe distance between two (discretized) distributions `P` and `Q`. + The Topsoe distance for two discrete distributions of `k` bins is defined as: + + .. math:: + Topsoe(P,Q) = \\sum_{i=1}^k \\left( p_i \\log\\left(\\frac{ 2 p_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) + + q_i \\log\\left(\\frac{ 2 q_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) \\right) + + :param P: real-valued array-like of shape `(k,)` representing a discrete distribution + :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution + :return: float + """ + return np.sum(P*np.log((2*P+epsilon)/(P+Q+epsilon)) + Q*np.log((2*Q+epsilon)/(P+Q+epsilon))) def uniform_prevalence_sampling(n_classes, size=1): @@ -136,7 +146,6 @@ def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True): .. math:: ACC(p) = \\frac{ p - fpr }{ tpr - fpr } - :param prevalence_estim: float, the estimated value for the positive class :param tpr: float, the true positive rate of the classifier :param fpr: float, the false positive rate of the classifier @@ -184,7 +193,7 @@ def __num_prevalence_combinations_depr(n_prevpoints:int, n_classes:int, n_repeat :param n_prevpoints: integer, number of prevalence points. :param n_repeats: integer, number of repetitions for each prevalence combination :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the - number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] + number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] """ __cache={} def __f(nc,np): @@ -216,7 +225,7 @@ def num_prevalence_combinations(n_prevpoints:int, n_classes:int, n_repeats:int=1 :param n_prevpoints: integer, number of prevalence points. :param n_repeats: integer, number of repetitions for each prevalence combination :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the - number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] + number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0] """ N = n_prevpoints-1 C = n_classes @@ -230,7 +239,7 @@ def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repe that the number of valid prevalence values generated as combinations of prevalence points (points in a `n_classes`-dimensional simplex) do not exceed combinations_budget. - :param combinations_budget: integer, maximum number of combinatios allowed + :param combinations_budget: integer, maximum number of combinations allowed :param n_classes: integer, number of classes :param n_repeats: integer, number of repetitions for each prevalence combination :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences @@ -248,6 +257,7 @@ def get_nprevpoints_approximation(combinations_budget:int, n_classes:int, n_repe def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08): """ Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1. + :param p: the prevalence vector to check :return: True if `p` is valid, False otherwise """ @@ -265,3 +275,4 @@ def check_prevalence_vector(p, raise_exception=False, toleranze=1e-08): raise ValueError('the prevalence vector does not sum up to 1') return False return True + diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 87b682e..a9a93cb 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -76,7 +76,7 @@ class AggregativeQuantifier(BaseQuantifier): by the classifier. :param instances: array-like - :return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates. + :return: `np.ndarray` of shape `(n_classes)` with class prevalence estimates. """ classif_predictions = self.classify(instances) return self.aggregate(classif_predictions) @@ -87,7 +87,7 @@ class AggregativeQuantifier(BaseQuantifier): Implements the aggregation of label predictions. :param classif_predictions: `np.ndarray` of label predictions - :return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates. + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. """ ... @@ -113,19 +113,6 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier): def classify(self, instances): return self.classifier.predict_proba(instances) - # def set_params(self, **parameters): - # if isinstance(self.classifier, CalibratedClassifierCV): - # if self.classifier.get_params().get('base_estimator') == 'deprecated': - # key_prefix = 'estimator__' # this has changed in the newer versions of sklearn - # else: - # key_prefix = 'base_estimator__' - # parameters = {key_prefix + k: v for k, v in parameters.items()} - # elif isinstance(self.classifier, RecalibratedClassifier): - # parameters = {'estimator__' + k: v for k, v in parameters.items()} - # - # self.classifier.set_params(**parameters) - # return self - # Helper # ------------------------------------ @@ -198,7 +185,7 @@ def cross_generate_predictions( n_jobs ): - n_jobs = qp.get_njobs(n_jobs) + n_jobs = qp._get_njobs(n_jobs) if isinstance(val_split, int): assert fit_classifier == True, \ @@ -305,7 +292,7 @@ class CC(AggregativeQuantifier): Computes class prevalence estimates by counting the prevalence of each of the predicted labels. :param classif_predictions: array-like with label predictions - :return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates. + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. """ return F.prevalence_from_labels(classif_predictions, self.classes_) @@ -328,7 +315,7 @@ class ACC(AggregativeQuantifier): def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): self.classifier = classifier self.val_split = val_split - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): """ @@ -435,7 +422,7 @@ class PACC(AggregativeProbabilisticQuantifier): def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): self.classifier = classifier self.val_split = val_split - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): """ @@ -660,6 +647,20 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier): return np.asarray([1 - class1_prev, class1_prev]) +def _get_divergence(divergence: Union[str, Callable]): + if isinstance(divergence, str): + if divergence=='HD': + return F.HellingerDistance + elif divergence=='topsoe': + return F.TopsoeDistance + else: + raise ValueError(f'unknown divergence {divergence}') + elif callable(divergence): + return divergence + else: + raise ValueError(f'argument "divergence" not understood; use a str or a callable function') + + class DyS(AggregativeProbabilisticQuantifier, BinaryQuantifier): """ `DyS framework `_ (DyS). @@ -765,25 +766,13 @@ class SMM(AggregativeProbabilisticQuantifier, BinaryQuantifier): return np.asarray([1 - class1_prev, class1_prev]) -def _get_divergence(divergence: Union[str, Callable]): - if isinstance(divergence, str): - if divergence=='HD': - return F.HellingerDistance - elif divergence=='topsoe': - return F.TopsoeDistance - else: - raise ValueError(f'unknown divergence {divergence}') - elif callable(divergence): - return divergence - else: - raise ValueError(f'argument "divergence" not understood; use a str or a callable function') class DistributionMatching(AggregativeProbabilisticQuantifier): """ Generic Distribution Matching quantifier for binary or multiclass quantification. This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters. - :param classifier: a sklearn's Estimator that generates a probabilistic classifier + :param classifier: a `sklearn`'s Estimator that generates a probabilistic classifier :param val_split: indicates the proportion of data to be used as a stratified held-out validation set to model the validation distribution. This parameter can be indicated as a real value (between 0 and 1, default 0.4), representing a proportion of @@ -799,7 +788,6 @@ class DistributionMatching(AggregativeProbabilisticQuantifier): """ def __init__(self, classifier, val_split=0.4, nbins=8, divergence: Union[str, Callable]='HD', cdf=False, n_jobs=None): - self.classifier = classifier self.val_split = val_split self.nbins = nbins @@ -1020,7 +1008,7 @@ class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None): self.classifier = classifier self.val_split = val_split - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, int, LabelledCollection] = None): self._check_binary(data, "Threshold Optimization") @@ -1277,7 +1265,7 @@ class OneVsAll(AggregativeQuantifier): assert isinstance(self.binary_quantifier, AggregativeQuantifier), \ f'{self.binary_quantifier} does not seem to be of type Aggregative' self.binary_quantifier = binary_quantifier - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, fit_classifier=True): assert not data.binary, \ diff --git a/quapy/method/base.py b/quapy/method/base.py index 459130c..a80f7b7 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -32,29 +32,10 @@ class BaseQuantifier(BaseEstimator): Generate class prevalence estimates for the sample's instances :param instances: array-like - :return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates. + :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates. """ ... - # @abstractmethod - # def set_params(self, **parameters): - # """ - # Set the parameters of the quantifier. - # - # :param parameters: dictionary of param-value pairs - # """ - # ... - # - # @abstractmethod - # def get_params(self, deep=True): - # """ - # Return the current parameters of the quantifier. - # - # :param deep: for compatibility with sklearn - # :return: a dictionary of param-value pairs - # """ - # ... - class BinaryQuantifier(BaseQuantifier): """ @@ -77,7 +58,7 @@ class OneVsAllGeneric: assert isinstance(binary_quantifier, BaseQuantifier), \ f'{binary_quantifier} does not seem to be a Quantifier' self.binary_quantifier = binary_quantifier - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) def fit(self, data: LabelledCollection, **kwargs): assert not data.binary, \ diff --git a/quapy/method/meta.py b/quapy/method/meta.py index 82d3a35..ba682ee 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -84,7 +84,7 @@ class Ensemble(BaseQuantifier): self.red_size = red_size self.policy = policy self.val_split = val_split - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) self.post_proba_fn = None self.verbose = verbose self.max_sample_size = max_sample_size @@ -147,7 +147,7 @@ class Ensemble(BaseQuantifier): with the abstract class). Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for - classification (not recommended). + classification (not recommended). :param parameters: dictionary :return: raises an Exception @@ -163,10 +163,12 @@ class Ensemble(BaseQuantifier): with the abstract class). Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for - classification (not recommended). + classification (not recommended). + :param deep: for compatibility with scikit-learn :return: raises an Exception """ + raise NotImplementedError() def _accuracy_policy(self, error_name): diff --git a/quapy/method/non_aggregative.py b/quapy/method/non_aggregative.py index f70a0c6..0a8680d 100644 --- a/quapy/method/non_aggregative.py +++ b/quapy/method/non_aggregative.py @@ -21,7 +21,6 @@ class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier): :param data: the training sample :return: self """ - self._classes_ = data.classes_ self.estimated_prevalence = data.prevalence() return self @@ -34,29 +33,3 @@ class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier): """ return self.estimated_prevalence - @property - def classes_(self): - """ - Number of classes - - :return: integer - """ - - return self._classes_ - - def get_params(self, deep=True): - """ - Does nothing, since this learner has no parameters. - - :param deep: for compatibility with sklearn - :return: `None` - """ - return None - - def set_params(self, **parameters): - """ - Does nothing, since this learner has no parameters. - - :param parameters: dictionary of param-value pairs (ignored) - """ - pass diff --git a/quapy/model_selection.py b/quapy/model_selection.py index 3cb22c7..b8b9282 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -49,7 +49,7 @@ class GridSearchQ(BaseQuantifier): self.protocol = protocol self.refit = refit self.timeout = timeout - self.n_jobs = qp.get_njobs(n_jobs) + self.n_jobs = qp._get_njobs(n_jobs) self.verbose = verbose self.__check_error(error) assert isinstance(protocol, AbstractProtocol), 'unknown protocol' diff --git a/quapy/protocol.py b/quapy/protocol.py index b30165f..1dec78b 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -11,13 +11,17 @@ from glob import glob class AbstractProtocol(metaclass=ABCMeta): + """ + Abstract parent class for sample generation protocols. + """ @abstractmethod def __call__(self): """ - Implements the protocol. Yields one sample at a time + Implements the protocol. Yields one sample at a time along with its prevalence - :return: yields one sample at a time + :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances + and in which `prev` is an `nd.array` with the class prevalence values """ ... @@ -32,9 +36,10 @@ class AbstractProtocol(metaclass=ABCMeta): class AbstractStochasticSeededProtocol(AbstractProtocol): """ - An AbstractStochasticSeededProtocol is a protocol that generates, via any random procedure (e.g., - via random sapling), sequences of `LabelledCollection` samples. The protocol abstraction enforces - the object to be instantiated using a seed, so that the sequence can be completely replicated. + An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g., + via random sampling), sequences of :class:`quapy.data.base.LabelledCollection` samples. + The protocol abstraction enforces + the object to be instantiated using a seed, so that the sequence can be fully replicated. In order to make this functionality possible, the classes extending this abstraction need to implement only two functions, :meth:`samples_parameters` which generates all the parameters needed for extracting the samples, and :meth:`sample` that, given some parameters as input, @@ -128,7 +133,8 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): combination of prevalence values is indicated by `repeats`. :param data: a `LabelledCollection` from which the samples will be drawn - :param sample_size: integer, number of instances in each sample + :param sample_size: integer, number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the grid (default is 21) :param repeats: number of copies for each valid prevalence vector (default is 10) @@ -138,10 +144,11 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data:LabelledCollection, sample_size, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=None, return_type='sample_prev'): + def __init__(self, data:LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, + smooth_limits_epsilon=0, random_state=None, return_type='sample_prev'): super(APP, self).__init__(random_state) self.data = data - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.n_prevalences = n_prevalences self.repeats = repeats self.smooth_limits_epsilon = smooth_limits_epsilon @@ -191,17 +198,18 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): samples uniformly at random, therefore approximately preserving the natural prevalence of the collection. :param data: a `LabelledCollection` from which the samples will be drawn - :param sample_size: integer, the number of instances in each sample + :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. :param repeats: the number of samples to generate. Default is 100. :param random_state: allows replicating samples across runs (default None) :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data:LabelledCollection, sample_size, repeats=100, random_state=None, return_type='sample_prev'): + def __init__(self, data:LabelledCollection, sample_size=None, repeats=100, random_state=None, return_type='sample_prev'): super(NPP, self).__init__(random_state) self.data = data - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.repeats = repeats self.random_state = random_state self.collator = OnLabelledCollectionProtocol.get_collator(return_type) @@ -230,17 +238,19 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) combinations of the grid values of APP makes this endeavour intractable. :param data: a `LabelledCollection` from which the samples will be drawn - :param sample_size: integer, the number of instances in each sample + :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. :param repeats: the number of samples to generate. Default is 100. :param random_state: allows replicating samples across runs (default None) :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data: LabelledCollection, sample_size, repeats=100, random_state=None, return_type='sample_prev'): + def __init__(self, data: LabelledCollection, sample_size=None, repeats=100, random_state=None, + return_type='sample_prev'): super(USimplexPP, self).__init__(random_state) self.data = data - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.repeats = repeats self.random_state = random_state self.collator = OnLabelledCollectionProtocol.get_collator(return_type) @@ -259,32 +269,7 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) return self.repeats -# class LoadSamplesFromDirectory(AbstractProtocol): -# -# def __init__(self, folder_path, loader_fn, classes=None, **loader_kwargs): -# assert exists(folder_path), f'folder {folder_path} does not exist' -# assert callable(loader_fn), f'the passed load_fn does not seem to be callable' -# self.folder_path = folder_path -# self.loader_fn = loader_fn -# self.classes = classes -# self.loader_kwargs = loader_kwargs -# self._list_files = None -# -# def __call__(self): -# for file in self.list_files: -# yield LabelledCollection.load(file, loader_func=self.loader_fn, classes=self.classes, **self.loader_kwargs) -# -# @property -# def list_files(self): -# if self._list_files is None: -# self._list_files = sorted(glob(self.folder_path, '*')) -# return self._list_files -# -# def total(self): -# return len(self.list_files) - - -class CovariateShiftPP(AbstractStochasticSeededProtocol): +class DomainMixer(AbstractStochasticSeededProtocol): """ Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. @@ -311,10 +296,10 @@ class CovariateShiftPP(AbstractStochasticSeededProtocol): mixture_points=11, random_state=None, return_type='sample_prev'): - super(CovariateShiftPP, self).__init__(random_state) + super(DomainMixer, self).__init__(random_state) self.A = domainA self.B = domainB - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.repeats = repeats if prevalence is None: self.prevalence = domainA.prevalence() diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py index bcf721c..f13907c 100644 --- a/quapy/tests/test_methods.py +++ b/quapy/tests/test_methods.py @@ -4,6 +4,7 @@ from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC import quapy as qp +from quapy.method.base import BinaryQuantifier from quapy.data import Dataset, LabelledCollection from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS from quapy.method.aggregative import ACC, PACC, HDy @@ -21,7 +22,7 @@ learners = [LogisticRegression, LinearSVC] def test_aggregative_methods(dataset: Dataset, aggregative_method, learner): model = aggregative_method(learner()) - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {type(model)} on non-binary dataset {dataset}') return @@ -45,7 +46,7 @@ def test_elm_methods(dataset: Dataset, elm_method): print('Missing SVMperf binary program, skipping test') return - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return @@ -64,7 +65,7 @@ def test_elm_methods(dataset: Dataset, elm_method): def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method): model = non_aggregative_method() - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return @@ -85,7 +86,7 @@ def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method): def test_ensemble_method(base_method, learner, dataset: Dataset, policy): qp.environ['SAMPLE_SIZE'] = len(dataset.training) model = Ensemble(quantifier=base_method(learner()), size=5, policy=policy, n_jobs=-1) - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return @@ -120,7 +121,7 @@ def test_quanet_method(): from quapy.method.meta import QuaNet model = QuaNet(learner, sample_size=len(dataset.training), device='cuda') - if model.binary and not dataset.binary: + if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') return @@ -138,7 +139,7 @@ def models_to_test_for_str_label_names(): models = list() learner = LogisticRegression for method in AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS): - models.append(method(learner())) + models.append(method(learner(random_state=0))) for method in NON_AGGREGATIVE_METHODS: models.append(method()) return models @@ -156,6 +157,7 @@ def test_str_label_names(model): dataset.test.sampling(1000, *dataset.test.prevalence())) qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True) + numpy.random.seed(0) model.fit(dataset.training) int_estim_prevalences = model.quantify(dataset.test.instances) @@ -168,7 +170,8 @@ def test_str_label_names(model): ['one' if label == 1 else 'zero' for label in dataset.training.labels]), LabelledCollection(dataset.test.instances, ['one' if label == 1 else 'zero' for label in dataset.test.labels])) - + assert all(dataset_str.training.classes_ == dataset_str.test.classes_), 'wrong indexation' + numpy.random.seed(0) model.fit(dataset_str.training) str_estim_prevalences = model.quantify(dataset_str.test.instances) diff --git a/quapy/tests/test_modsel.py b/quapy/tests/test_modsel.py index d54dcbe..180f680 100644 --- a/quapy/tests/test_modsel.py +++ b/quapy/tests/test_modsel.py @@ -5,9 +5,9 @@ from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC import quapy as qp -from method.aggregative import PACC -from model_selection import GridSearchQ -from protocol import APP +from quapy.method.aggregative import PACC +from quapy.model_selection import GridSearchQ +from quapy.protocol import APP import time @@ -20,7 +20,7 @@ class ModselTestCase(unittest.TestCase): data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) training, validation = data.training.split_stratified(0.7, random_state=1) - param_grid = {'C': np.logspace(-3,3,7)} + param_grid = {'classifier__C': np.logspace(-3,3,7)} app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, verbose=True @@ -28,8 +28,8 @@ class ModselTestCase(unittest.TestCase): print('best params', q.best_params_) print('best score', q.best_score_) - self.assertEqual(q.best_params_['C'], 10.0) - self.assertEqual(q.best_model().get_params()['C'], 10.0) + self.assertEqual(q.best_params_['classifier__C'], 10.0) + self.assertEqual(q.best_model().get_params()['classifier__C'], 10.0) def test_modsel_parallel(self): @@ -39,7 +39,7 @@ class ModselTestCase(unittest.TestCase): training, validation = data.training.split_stratified(0.7, random_state=1) # test = data.test - param_grid = {'C': np.logspace(-3,3,7)} + param_grid = {'classifier__C': np.logspace(-3,3,7)} app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=-1, n_jobs=-1, verbose=True @@ -47,8 +47,8 @@ class ModselTestCase(unittest.TestCase): print('best params', q.best_params_) print('best score', q.best_score_) - self.assertEqual(q.best_params_['C'], 10.0) - self.assertEqual(q.best_model().get_params()['C'], 10.0) + self.assertEqual(q.best_params_['classifier__C'], 10.0) + self.assertEqual(q.best_model().get_params()['classifier__C'], 10.0) def test_modsel_parallel_speedup(self): class SlowLR(LogisticRegression): @@ -61,7 +61,7 @@ class ModselTestCase(unittest.TestCase): data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=10) training, validation = data.training.split_stratified(0.7, random_state=1) - param_grid = {'C': np.logspace(-3, 3, 7)} + param_grid = {'classifier__C': np.logspace(-3, 3, 7)} app = APP(validation, sample_size=100, random_state=1) tinit = time.time() @@ -95,7 +95,7 @@ class ModselTestCase(unittest.TestCase): training, validation = data.training.split_stratified(0.7, random_state=1) # test = data.test - param_grid = {'C': np.logspace(-3,3,7)} + param_grid = {'classifier__C': np.logspace(-3,3,7)} app = APP(validation, sample_size=100, random_state=1) q = GridSearchQ( q, param_grid, protocol=app, error='mae', refit=True, timeout=3, n_jobs=-1, verbose=True diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index dea3290..1510fee 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -1,7 +1,7 @@ import unittest import numpy as np from quapy.data import LabelledCollection -from quapy.protocol import APP, NPP, USimplexPP, CovariateShiftPP, AbstractStochasticSeededProtocol +from quapy.protocol import APP, NPP, USimplexPP, DomainMixer, AbstractStochasticSeededProtocol def mock_labelled_collection(prefix=''): @@ -94,7 +94,7 @@ class TestProtocols(unittest.TestCase): def test_covariate_shift_replicate(self): dataA = mock_labelled_collection('domA') dataB = mock_labelled_collection('domB') - p = CovariateShiftPP(dataA, dataB, sample_size=10, mixture_points=11, random_state=1) + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11, random_state=1) samples1 = samples_to_str(p) samples2 = samples_to_str(p) @@ -104,7 +104,7 @@ class TestProtocols(unittest.TestCase): def test_covariate_shift_not_replicate(self): dataA = mock_labelled_collection('domA') dataB = mock_labelled_collection('domB') - p = CovariateShiftPP(dataA, dataB, sample_size=10, mixture_points=11) + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11) samples1 = samples_to_str(p) samples2 = samples_to_str(p) diff --git a/quapy/util.py b/quapy/util.py index 50a640d..6f8543d 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -22,7 +22,7 @@ def _get_parallel_slices(n_tasks, n_jobs): def map_parallel(func, args, n_jobs): """ - Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then + Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and `n_jobs`=2, then func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function that already works with a list of arguments. @@ -128,6 +128,7 @@ def create_if_not_exist(path): def get_quapy_home(): """ Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets. + This directory is `~/quapy_data` :return: a string representing the path """ @@ -162,7 +163,7 @@ def save_text_file(path, text): def pickled_resource(pickle_path:str, generation_func:callable, *args): """ - Allows for fast reuse of resources that are generated only once by calling generation_func(*args). The next times + Allows for fast reuse of resources that are generated only once by calling generation_func(\\*args). The next times this function is invoked, it loads the pickled resource. Example: >>> def some_array(n): # a mock resource created with one parameter (`n`) @@ -191,10 +192,6 @@ class EarlyStop: """ A class implementing the early-stopping condition typically used for training neural networks. - :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a - held-out validation split) can be found to be worse than the best one obtained so far, before flagging the - stopping condition. An instance of this class is `callable`, and is to be used as follows: - >>> earlystop = EarlyStop(patience=2, lower_is_better=True) >>> earlystop(0.9, epoch=0) >>> earlystop(0.7, epoch=1) @@ -206,14 +203,14 @@ class EarlyStop: >>> earlystop.best_epoch # is 1 >>> earlystop.best_score # is 0.7 - + :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a + held-out validation split) can be found to be worse than the best one obtained so far, before flagging the + stopping condition. An instance of this class is `callable`, and is to be used as follows: :param lower_is_better: if True (default) the metric is to be minimized. - :ivar best_score: keeps track of the best value seen so far :ivar best_epoch: keeps track of the epoch in which the best score was set :ivar STOP: flag (boolean) indicating the stopping condition :ivar IMPROVED: flag (boolean) indicating whether there was an improvement in the last call - """ def __init__(self, patience, lower_is_better=True): @@ -243,4 +240,5 @@ class EarlyStop: else: self.patience -= 1 if self.patience <= 0: - self.STOP = True \ No newline at end of file + self.STOP = True + From e28abfc362573aeedeebea300874c6b904cafc98 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Thu, 9 Feb 2023 19:39:16 +0100 Subject: [PATCH 49/59] more examples, one-vs-all fixed --- examples/one_vs_all_example.py | 53 +++++++++++++++ quapy/CHANGE_LOG.txt | 1 - quapy/classification/calibration.py | 2 +- quapy/data/base.py | 100 ++++++++++++++++------------ quapy/data/datasets.py | 1 + quapy/method/aggregative.py | 51 +++----------- quapy/method/base.py | 71 ++++++++++++-------- quapy/tests/test_protocols.py | 2 +- quapy/tests/test_replicability.py | 52 ++++++++++++++- quapy/util.py | 12 ++-- 10 files changed, 224 insertions(+), 121 deletions(-) create mode 100644 examples/one_vs_all_example.py diff --git a/examples/one_vs_all_example.py b/examples/one_vs_all_example.py new file mode 100644 index 0000000..7488199 --- /dev/null +++ b/examples/one_vs_all_example.py @@ -0,0 +1,53 @@ +import quapy as qp +from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric +from quapy.method.base import getOneVsAll +from quapy.model_selection import GridSearchQ +from quapy.protocol import USimplexPP +from sklearn.linear_model import LogisticRegression +import numpy as np + +""" +In this example, we will create a quantifier for tweet sentiment analysis considering three classes: negative, neutral, +and positive. We will use a one-vs-all approach using a binary quantifier for demonstration purposes. +""" + +qp.environ['SAMPLE_SIZE'] = 100 + +""" +Any binary quantifier can be turned into a single-label quantifier by means of getOneVsAll function. +This function returns an instance of OneVsAll quantifier. Actually, it either returns the subclass OneVsAllGeneric +when the quantifier is an instance of BaseQuantifier, and it returns OneVsAllAggregative when the quantifier is +an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all cases, using OneVsAllAggregative has +some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions +during evaluation). +""" +quantifier = getOneVsAll(MS2(LogisticRegression()), parallel_backend="loky") +print(f'the quantifier is an instance of {quantifier.__class__.__name__}') + +# load a ternary dataset +train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test + +""" +model selection: for this example, we are relying on the USimplexPP protocol, i.e., a variant of the +artificial-prevalence protocol that generates random samples (100 in this case) for randomly picked priors +from the unit simplex. The priors are sampled using the Kraemer algorithm. Note this is in contrast to the +standard APP protocol, that instead explores a prefixed grid of prevalence values. +""" +param_grid = { + 'binary_quantifier__classifier__C': np.logspace(-2,2,5), # classifier-dependent hyperparameter + 'binary_quantifier__classifier__class_weight': ['balanced', None] # classifier-dependent hyperparameter +} +print('starting model selection') +gs = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), n_jobs=-1, verbose=True, refit=False) +quantifier = gs.fit(train_modsel).best_model() + +print('training on the whole training set') +train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test +quantifier.fit(train) + +# evaluation +mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae') + +print(f'MAE = {mae:.4f}') + + diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 3fb21f6..98c939f 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -83,7 +83,6 @@ Things to fix: - update unit tests - update Wikis... - Resolve the OneVsAll thing (it is in base.py and in aggregative.py) -- Add a proper log? - improve plots - documentation of protocols is incomplete diff --git a/quapy/classification/calibration.py b/quapy/classification/calibration.py index f35bb97..a3f1543 100644 --- a/quapy/classification/calibration.py +++ b/quapy/classification/calibration.py @@ -65,7 +65,7 @@ class RecalibratedProbabilisticClassifierBase(BaseEstimator, RecalibratedProbabi """ Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all training instances via cross-validation, and then retrains the classifier on all training instances. - The posterior probabilities thus generated are used for calibrating the outpus of the classifier. + The posterior probabilities thus generated are used for calibrating the outputs of the classifier. :param X: array-like of shape `(n_samples, n_features)` with the data instances :param y: array-like of shape `(n_samples,)` with the class labels diff --git a/quapy/data/base.py b/quapy/data/base.py index 62f871d..7093821 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -6,21 +6,22 @@ from scipy.sparse import vstack from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold from numpy.random import RandomState from quapy.functional import strprev +from quapy.util import temp_seed class LabelledCollection: """ - A LabelledCollection is a set of objects each with a label associated to it. This class implements many sampling - routines. - + A LabelledCollection is a set of objects each with a label attached to each of them. + This class implements several sampling routines and other utilities. + :param instances: array-like (np.ndarray, list, or csr_matrix are supported) :param labels: array-like with the same length of instances - :param classes_: optional, list of classes from which labels are taken. If not specified, the classes are inferred + :param classes: optional, list of classes from which labels are taken. If not specified, the classes are inferred from the labels. The classes must be indicated in cases in which some of the labels might have no examples (i.e., a prevalence of 0) """ - def __init__(self, instances, labels, classes_=None): + def __init__(self, instances, labels, classes=None): if issparse(instances): self.instances = instances elif isinstance(instances, list) and len(instances) > 0 and isinstance(instances[0], str): @@ -30,14 +31,14 @@ class LabelledCollection: self.instances = np.asarray(instances) self.labels = np.asarray(labels) n_docs = len(self) - if classes_ is None: + if classes is None: self.classes_ = np.unique(self.labels) self.classes_.sort() else: - self.classes_ = np.unique(np.asarray(classes_)) + self.classes_ = np.unique(np.asarray(classes)) self.classes_.sort() - if len(set(self.labels).difference(set(classes_))) > 0: - raise ValueError(f'labels ({set(self.labels)}) contain values not included in classes_ ({set(classes_)})') + if len(set(self.labels).difference(set(classes))) > 0: + raise ValueError(f'labels ({set(self.labels)}) contain values not included in classes_ ({set(classes)})') self.index = {class_: np.arange(n_docs)[self.labels == class_] for class_ in self.classes_} @classmethod @@ -101,7 +102,7 @@ class LabelledCollection: """ return self.n_classes == 2 - def sampling_index(self, size, *prevs, shuffle=True): + def sampling_index(self, size, *prevs, shuffle=True, random_state=None): """ Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the prevalence values are not specified, then returns the index of a uniform sampling. @@ -113,10 +114,11 @@ class LabelledCollection: it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in `self.classes_` can be specified, while the other class takes prevalence value `1-p` :param shuffle: if set to True (default), shuffles the index before returning it + :param random_state: seed for reproducing sampling :return: a np.ndarray of shape `(size)` with the indexes """ if len(prevs) == 0: # no prevalence was indicated; returns an index for uniform sampling - return self.uniform_sampling_index(size) + return self.uniform_sampling_index(size, random_state=random_state) if len(prevs) == self.n_classes - 1: prevs = prevs + (1 - sum(prevs),) assert len(prevs) == self.n_classes, 'unexpected number of prevalences' @@ -129,22 +131,23 @@ class LabelledCollection: # (This aims at avoiding the remainder to be placed in a class for which the prevalence requested is 0.) n_requests = {class_: int(size * prevs[i]) for i, class_ in enumerate(self.classes_)} remainder = size - sum(n_requests.values()) - for rand_class in np.random.choice(self.classes_, size=remainder, p=prevs): - n_requests[rand_class] += 1 + with temp_seed(random_state): + for rand_class in np.random.choice(self.classes_, size=remainder, p=prevs): + n_requests[rand_class] += 1 - indexes_sample = [] - for class_, n_requested in n_requests.items(): - n_candidates = len(self.index[class_]) - index_sample = self.index[class_][ - np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates)) - ] if n_requested > 0 else [] + indexes_sample = [] + for class_, n_requested in n_requests.items(): + n_candidates = len(self.index[class_]) + index_sample = self.index[class_][ + np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates)) + ] if n_requested > 0 else [] - indexes_sample.append(index_sample) + indexes_sample.append(index_sample) - indexes_sample = np.concatenate(indexes_sample).astype(int) + indexes_sample = np.concatenate(indexes_sample).astype(int) - if shuffle: - indexes_sample = np.random.permutation(indexes_sample) + if shuffle: + indexes_sample = np.random.permutation(indexes_sample) return indexes_sample @@ -164,7 +167,7 @@ class LabelledCollection: ng = np.random return ng.choice(len(self), size, replace=size > len(self)) - def sampling(self, size, *prevs, shuffle=True): + def sampling(self, size, *prevs, shuffle=True, random_state=None): """ Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than @@ -175,10 +178,11 @@ class LabelledCollection: it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in `self.classes_` can be specified, while the other class takes prevalence value `1-p` :param shuffle: if set to True (default), shuffles the index before returning it + :param random_state: seed for reproducing sampling :return: an instance of :class:`LabelledCollection` with length == `size` and prevalence close to `prevs` (or prevalence == `prevs` if the exact prevalence values can be met as proportions of instances) """ - prev_index = self.sampling_index(size, *prevs, shuffle=shuffle) + prev_index = self.sampling_index(size, *prevs, shuffle=shuffle, random_state=random_state) return self.sampling_from_index(prev_index) def uniform_sampling(self, size, random_state=None): @@ -204,7 +208,7 @@ class LabelledCollection: """ documents = self.instances[index] labels = self.labels[index] - return LabelledCollection(documents, labels, classes_=self.classes_) + return LabelledCollection(documents, labels, classes=self.classes_) def split_stratified(self, train_prop=0.6, random_state=None): """ @@ -221,11 +225,10 @@ class LabelledCollection: tr_docs, te_docs, tr_labels, te_labels = train_test_split( self.instances, self.labels, train_size=train_prop, stratify=self.labels, random_state=random_state ) - training = LabelledCollection(tr_docs, tr_labels, classes_=self.classes_) - test = LabelledCollection(te_docs, te_labels, classes_=self.classes_) + training = LabelledCollection(tr_docs, tr_labels, classes=self.classes_) + test = LabelledCollection(te_docs, te_labels, classes=self.classes_) return training, test - def split_random(self, train_prop=0.6, random_state=None): """ Returns two instances of :class:`LabelledCollection` split randomly from this collection, at desired @@ -261,20 +264,33 @@ class LabelledCollection: :return: a :class:`LabelledCollection` representing the union of both collections """ if not all(np.sort(self.classes_)==np.sort(other.classes_)): - raise NotImplementedError('unsupported operation for collections on different classes') + raise NotImplementedError(f'unsupported operation for collections on different classes; ' + f'expected {self.classes_}, found {other.classes_}') + return LabelledCollection.mix(self, other) - if other is None: - return self - elif issparse(self.instances) and issparse(other.instances): - join_instances = vstack([self.instances, other.instances]) - elif isinstance(self.instances, list) and isinstance(other.instances, list): - join_instances = self.instances + other.instances - elif isinstance(self.instances, np.ndarray) and isinstance(other.instances, np.ndarray): - join_instances = np.concatenate([self.instances, other.instances]) + @classmethod + def mix(cls, a:'LabelledCollection', b:'LabelledCollection'): + """ + Returns a new :class:`LabelledCollection` as the union of this collection with another collection. + + :param a: instance of :class:`LabelledCollection` + :param b: instance of :class:`LabelledCollection` + :return: a :class:`LabelledCollection` representing the union of both collections + """ + if a is None: return b + if b is None: return a + elif issparse(a.instances) and issparse(b.instances): + join_instances = vstack([a.instances, b.instances]) + elif isinstance(a.instances, list) and isinstance(b.instances, list): + join_instances = a.instances + b.instances + elif isinstance(a.instances, np.ndarray) and isinstance(b.instances, np.ndarray): + join_instances = np.concatenate([a.instances, b.instances]) else: raise NotImplementedError('unsupported operation for collection types') - labels = np.concatenate([self.labels, other.labels]) - return LabelledCollection(join_instances, labels, classes_=self.classes_) + labels = np.concatenate([a.labels, b.labels]) + classes = np.unique(np.concatenate([a.classes_, b.classes_])).sort() + return LabelledCollection(join_instances, labels, classes=classes) + @property def Xy(self): @@ -291,7 +307,7 @@ class LabelledCollection: def Xp(self): """ Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from - a `LabelledCollection` object. + a :class:`LabelledCollection` object. :return: a tuple `(instances, prevalence)` from this collection """ @@ -357,7 +373,7 @@ class LabelledCollection: f'#classes={stats_["classes"]}, prevs={stats_["prevs"]}') return stats_ - def kFCV(self, nfolds=5, nrepeats=1, random_state=0): + def kFCV(self, nfolds=5, nrepeats=1, random_state=None): """ Generator of stratified folds to be used in k-fold cross validation. diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 241cd04..5c5eb99 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -6,6 +6,7 @@ import os import zipfile from os.path import join import pandas as pd +import scipy from quapy.data.base import Dataset, LabelledCollection from quapy.data.preprocessing import text2tfidf, reduce_columns diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index a9a93cb..e07f665 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -14,7 +14,7 @@ import quapy.functional as F from classification.calibration import NBVSCalibration, BCTSCalibration, TSCalibration, VSCalibration from quapy.classification.svmperf import SVMperf from quapy.data import LabelledCollection -from quapy.method.base import BaseQuantifier, BinaryQuantifier +from quapy.method.base import BaseQuantifier, BinaryQuantifier, OneVsAllGeneric # Abstract classes @@ -1246,7 +1246,7 @@ MedianSweep = MS MedianSweep2 = MS2 -class OneVsAll(AggregativeQuantifier): +class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier): """ Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the @@ -1257,25 +1257,19 @@ class OneVsAll(AggregativeQuantifier): :param binary_quantifier: a quantifier (binary) that will be employed to work on multiclass model in a one-vs-all manner :param n_jobs: number of parallel workers + :param parallel_backend: the parallel backend for joblib (default "loky"); this is helpful for some quantifiers + (e.g., ELM-based ones) that cannot be run with multiprocessing, since the temp dir they create during fit will + is removed and no longer available at predict time. """ - def __init__(self, binary_quantifier, n_jobs=None): - assert isinstance(self.binary_quantifier, BaseQuantifier), \ + def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='loky'): + assert isinstance(binary_quantifier, BaseQuantifier), \ f'{self.binary_quantifier} does not seem to be a Quantifier' - assert isinstance(self.binary_quantifier, AggregativeQuantifier), \ + assert isinstance(binary_quantifier, AggregativeQuantifier), \ f'{self.binary_quantifier} does not seem to be of type Aggregative' self.binary_quantifier = binary_quantifier self.n_jobs = qp._get_njobs(n_jobs) - - def fit(self, data: LabelledCollection, fit_classifier=True): - assert not data.binary, \ - f'{self.__class__.__name__} expect non-binary data' - assert fit_classifier == True, \ - 'fit_classifier must be True' - - self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_} - self.__parallel(self._delayed_binary_fit, data) - return self + self.parallel_backend = parallel_backend def classify(self, instances): """ @@ -1292,35 +1286,16 @@ class OneVsAll(AggregativeQuantifier): :return: `np.ndarray` """ - classif_predictions = self.__parallel(self._delayed_binary_classification, instances) + classif_predictions = self._parallel(self._delayed_binary_classification, instances) if isinstance(self.binary_quantifier, AggregativeProbabilisticQuantifier): return np.swapaxes(classif_predictions, 0, 1) else: return classif_predictions.T def aggregate(self, classif_predictions): - prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions) + prevalences = self._parallel(self._delayed_binary_aggregate, classif_predictions) return F.normalize_prevalence(prevalences) - def __parallel(self, func, *args, **kwargs): - return np.asarray( - # some quantifiers (in particular, ELM-based ones) cannot be run with multiprocess, since the temp dir they - # create during the fit will be removed and be no longer available for the predict... - Parallel(n_jobs=self.n_jobs, backend='threading')( - delayed(func)(c, *args, **kwargs) for c in self.classes_ - ) - ) - - @property - def classes_(self): - return sorted(self.dict_binary_quantifiers.keys()) - - def set_params(self, **parameters): - self.binary_quantifier.set_params(**parameters) - - def get_params(self, deep=True): - return self.binary_quantifier.get_params() - def _delayed_binary_classification(self, c, X): return self.dict_binary_quantifiers[c].classify(X) @@ -1328,7 +1303,3 @@ class OneVsAll(AggregativeQuantifier): # the estimation for the positive class prevalence return self.dict_binary_quantifiers[c].aggregate(classif_predictions[:, c])[1] - def _delayed_binary_fit(self, c, data): - bindata = LabelledCollection(data.instances, data.labels == c, classes_=[False, True]) - self.dict_binary_quantifiers[c].fit(bindata) - diff --git a/quapy/method/base.py b/quapy/method/base.py index a80f7b7..1803085 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -1,10 +1,12 @@ from abc import ABCMeta, abstractmethod from copy import deepcopy +from joblib import Parallel, delayed from sklearn.base import BaseEstimator import quapy as qp from quapy.data import LabelledCollection +import numpy as np # Base Quantifier abstract class @@ -48,50 +50,61 @@ class BinaryQuantifier(BaseQuantifier): f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.' -class OneVsAllGeneric: +class OneVsAll: + pass + + +def getOneVsAll(binary_quantifier, n_jobs=None, parallel_backend='loky'): + assert isinstance(binary_quantifier, BaseQuantifier), \ + f'{binary_quantifier} does not seem to be a Quantifier' + if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier): + return qp.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs, parallel_backend) + else: + return OneVsAllGeneric(binary_quantifier, n_jobs, parallel_backend) + + +class OneVsAllGeneric(OneVsAll,BaseQuantifier): """ Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1. """ - def __init__(self, binary_quantifier, n_jobs=None): + def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='loky'): assert isinstance(binary_quantifier, BaseQuantifier), \ f'{binary_quantifier} does not seem to be a Quantifier' + if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier): + print('[warning] the quantifier seems to be an instance of qp.method.aggregative.AggregativeQuantifier; ' + f'you might prefer instantiating {qp.method.aggregative.OneVsAllAggregative.__name__}') self.binary_quantifier = binary_quantifier self.n_jobs = qp._get_njobs(n_jobs) + self.parallel_backend = parallel_backend - def fit(self, data: LabelledCollection, **kwargs): - assert not data.binary, \ - f'{self.__class__.__name__} expect non-binary data' - self.class_quatifier = {c: deepcopy(self.binary_quantifier) for c in data.classes_} - Parallel(n_jobs=self.n_jobs, backend='threading')( - delayed(self._delayed_binary_fit)(c, self.class_quatifier, data, **kwargs) for c in data.classes_ - ) + def fit(self, data: LabelledCollection, fit_classifier=True): + assert not data.binary, f'{self.__class__.__name__} expect non-binary data' + assert fit_classifier == True, 'fit_classifier must be True' + + self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_} + self._parallel(self._delayed_binary_fit, data) return self - def quantify(self, X, *args): - prevalences = np.asarray( - Parallel(n_jobs=self.n_jobs, backend='threading')( - delayed(self._delayed_binary_predict)(c, self.class_quatifier, X) for c in self.classes + def _parallel(self, func, *args, **kwargs): + return np.asarray( + Parallel(n_jobs=self.n_jobs, backend=self.parallel_backend)( + delayed(func)(c, *args, **kwargs) for c in self.classes_ ) ) - return F.normalize_prevalence(prevalences) + + def quantify(self, instances): + prevalences = self._parallel(self._delayed_binary_predict, instances) + return qp.functional.normalize_prevalence(prevalences) @property - def classes(self): - return sorted(self.class_quatifier.keys()) - - def set_params(self, **parameters): - self.binary_quantifier.set_params(**parameters) - - def get_params(self, deep=True): - return self.binary_quantifier.get_params() - - def _delayed_binary_predict(self, c, quantifiers, X): - return quantifiers[c].quantify(X)[:, 1] # the mean is the estimation for the positive class prevalence - - def _delayed_binary_fit(self, c, quantifiers, data, **kwargs): - bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2) - quantifiers[c].fit(bindata, **kwargs) + def classes_(self): + return sorted(self.dict_binary_quantifiers.keys()) + def _delayed_binary_predict(self, c, X): + return self.dict_binary_quantifiers[c].quantify(X)[1] + def _delayed_binary_fit(self, c, data): + bindata = LabelledCollection(data.instances, data.labels == c, classes=[False, True]) + self.dict_binary_quantifiers[c].fit(bindata) diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index 1510fee..e5d446e 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -7,7 +7,7 @@ from quapy.protocol import APP, NPP, USimplexPP, DomainMixer, AbstractStochastic def mock_labelled_collection(prefix=''): y = [0] * 250 + [1] * 250 + [2] * 250 + [3] * 250 X = [prefix + str(i) + '-' + str(yi) for i, yi in enumerate(y)] - return LabelledCollection(X, y, classes_=sorted(np.unique(y))) + return LabelledCollection(X, y, classes=sorted(np.unique(y))) def samples_to_str(protocol): diff --git a/quapy/tests/test_replicability.py b/quapy/tests/test_replicability.py index 329ac32..e89531a 100644 --- a/quapy/tests/test_replicability.py +++ b/quapy/tests/test_replicability.py @@ -1,13 +1,14 @@ import unittest import quapy as qp +from quapy.data import LabelledCollection from quapy.functional import strprev from sklearn.linear_model import LogisticRegression -from method.aggregative import PACC +from quapy.method.aggregative import PACC class MyTestCase(unittest.TestCase): - def test_replicability(self): + def test_prediction_replicability(self): dataset = qp.datasets.fetch_UCIDataset('yeast') @@ -25,6 +26,53 @@ class MyTestCase(unittest.TestCase): self.assertEqual(str_prev1, str_prev2) # add assertion here + def test_samping_replicability(self): + import numpy as np + + def equal_collections(c1, c2, value=True): + self.assertEqual(np.all(c1.X == c2.X), value) + self.assertEqual(np.all(c1.y == c2.y), value) + if value: + self.assertEqual(np.all(c1.classes_ == c2.classes_), value) + + X = list(map(str, range(100))) + y = np.random.randint(0, 2, 100) + data = LabelledCollection(instances=X, labels=y) + + sample1 = data.sampling(50) + sample2 = data.sampling(50) + equal_collections(sample1, sample2, False) + + sample1 = data.sampling(50, random_state=0) + sample2 = data.sampling(50, random_state=0) + equal_collections(sample1, sample2, True) + + sample1 = data.sampling(50, *[0.7, 0.3], random_state=0) + sample2 = data.sampling(50, *[0.7, 0.3], random_state=0) + equal_collections(sample1, sample2, True) + + with qp.util.temp_seed(0): + sample1 = data.sampling(50, *[0.7, 0.3]) + with qp.util.temp_seed(0): + sample2 = data.sampling(50, *[0.7, 0.3]) + equal_collections(sample1, sample2, True) + + sample1 = data.sampling(50, *[0.7, 0.3], random_state=0) + sample2 = data.sampling(50, *[0.7, 0.3], random_state=0) + equal_collections(sample1, sample2, True) + + sample1_tr, sample1_te = data.split_stratified(train_prop=0.7, random_state=0) + sample2_tr, sample2_te = data.split_stratified(train_prop=0.7, random_state=0) + equal_collections(sample1_tr, sample2_tr, True) + equal_collections(sample1_te, sample2_te, True) + + with qp.util.temp_seed(0): + sample1_tr, sample1_te = data.split_stratified(train_prop=0.7) + with qp.util.temp_seed(0): + sample2_tr, sample2_te = data.split_stratified(train_prop=0.7) + equal_collections(sample1_tr, sample2_tr, True) + equal_collections(sample1_te, sample2_te, True) + if __name__ == '__main__': unittest.main() diff --git a/quapy/util.py b/quapy/util.py index 6f8543d..298f02a 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -73,14 +73,16 @@ def temp_seed(random_state): :param random_state: the seed to set within the "with" context """ - state = np.random.get_state() - #save the seed just in case is needed (for instance for setting the seed to child processes) - qp.environ['_R_SEED'] = random_state - np.random.seed(random_state) + if random_state is not None: + state = np.random.get_state() + #save the seed just in case is needed (for instance for setting the seed to child processes) + qp.environ['_R_SEED'] = random_state + np.random.seed(random_state) try: yield finally: - np.random.set_state(state) + if random_state is not None: + np.random.set_state(state) def download_file(url, archive_filename): From 9584e5152eabf8c78f147e1c8461bbce81717d8e Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Thu, 9 Feb 2023 19:39:39 +0100 Subject: [PATCH 50/59] more examples, one-vs-all fixed --- quapy/CHANGE_LOG.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 98c939f..cb10202 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -78,8 +78,6 @@ Change Log 0.1.7 Things to fix: -------------- -- OneVsAll is duplicated (in aggregative and in general), and is not well documented. It is not working either. - Check method def __parallel(self, func, *args, **kwargs) in aggregative.OneVsAll - update unit tests - update Wikis... - Resolve the OneVsAll thing (it is in base.py and in aggregative.py) From 33a21db52cbefac545deea82a3ccd7b47f1a733f Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Thu, 9 Feb 2023 19:43:24 +0100 Subject: [PATCH 51/59] more examples, one-vs-all fixed --- quapy/CHANGE_LOG.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index cb10202..4b14d14 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -76,11 +76,15 @@ Change Log 0.1.7 so that probabilistic classifiers return posterior probabilities, while non-probabilistic quantifiers return crisp decisions. +- OneVsAll fixed. There are now two classes: a generic one OneVsAllGeneric that works with any quantifier (e.g., + any instance of BaseQuantifier), and a subclass of it called OneVsAllAggregative which implements the + classify / aggregate interface. Both are instances of OneVsAll. There is a method getOneVsAll that returns the + best instance based on the type of quantifier. + Things to fix: -------------- - update unit tests - update Wikis... -- Resolve the OneVsAll thing (it is in base.py and in aggregative.py) - improve plots - documentation of protocols is incomplete From 952cf5e76706ad7d186cf75778000e25d74ff161 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 10 Feb 2023 19:02:17 +0100 Subject: [PATCH 52/59] fixing bugs in one-vs-all --- docs/build/html/genindex.html | 26 ++-- docs/build/html/objects.inv | Bin 2873 -> 2859 bytes docs/build/html/quapy.classification.html | 2 +- docs/build/html/quapy.data.html | 33 +++- docs/build/html/quapy.html | 26 ++-- docs/build/html/quapy.method.html | 141 ++++++------------ docs/build/html/searchindex.js | 2 +- examples/custom_quantifier.py | 8 +- examples/lequa2022_experiments.py | 26 +++- .../{one_vs_all_example.py => one_vs_all.py} | 7 +- quapy/CHANGE_LOG.txt | 2 - quapy/method/aggregative.py | 2 +- quapy/method/base.py | 11 +- quapy/model_selection.py | 3 +- quapy/protocol.py | 136 +++++++++++++++-- quapy/tests/test_protocols.py | 57 ++++++- 16 files changed, 308 insertions(+), 174 deletions(-) rename examples/{one_vs_all_example.py => one_vs_all.py} (90%) diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html index 7e5281f..fc438e0 100644 --- a/docs/build/html/genindex.html +++ b/docs/build/html/genindex.html @@ -113,7 +113,7 @@
  • (quapy.method.aggregative.HDy method)
  • -
  • (quapy.method.aggregative.OneVsAll method) +
  • (quapy.method.aggregative.OneVsAllAggregative method)
  • (quapy.method.aggregative.PACC method)
  • @@ -177,8 +177,6 @@
  • CC (class in quapy.method.aggregative)
  • check_prevalence_vector() (in module quapy.functional) -
  • -
  • classes (quapy.method.base.OneVsAllGeneric property)
  • classes_ (quapy.classification.calibration.RecalibratedProbabilisticClassifierBase property) @@ -187,7 +185,7 @@
  • (quapy.method.aggregative.AggregativeQuantifier property)
  • -
  • (quapy.method.aggregative.OneVsAll property) +
  • (quapy.method.base.OneVsAllGeneric property)
  • (quapy.method.neural.QuaNetTrainer property)
  • @@ -203,7 +201,7 @@
  • (quapy.method.aggregative.ELM method)
  • -
  • (quapy.method.aggregative.OneVsAll method) +
  • (quapy.method.aggregative.OneVsAllAggregative method)
  • (quapy.method.aggregative.PACC method)
  • @@ -358,8 +356,6 @@
  • (quapy.method.aggregative.EMQ method)
  • (quapy.method.aggregative.HDy method) -
  • -
  • (quapy.method.aggregative.OneVsAll method)
  • (quapy.method.aggregative.PACC method)
  • @@ -426,10 +422,6 @@
  • (quapy.classification.neural.NeuralClassifierTrainer method)
  • (quapy.classification.neural.TextClassifierNet method) -
  • -
  • (quapy.method.aggregative.OneVsAll method) -
  • -
  • (quapy.method.base.OneVsAllGeneric method)
  • (quapy.method.meta.Ensemble method)
  • @@ -443,6 +435,8 @@
  • get_probability_distribution() (in module quapy.method.meta)
  • get_quapy_home() (in module quapy.util) +
  • +
  • getOneVsAll() (in module quapy.method.base)
  • getPteCondEstim() (quapy.method.aggregative.ACC class method) @@ -539,6 +533,8 @@
  • MedianSweep (in module quapy.method.aggregative)
  • MedianSweep2 (in module quapy.method.aggregative) +
  • +
  • mix() (quapy.data.base.LabelledCollection class method)
  • mkld() (in module quapy.error)
  • @@ -641,10 +637,12 @@
      +
    • OneVsAllAggregative (class in quapy.method.aggregative) +
    • OneVsAllGeneric (class in quapy.method.base)
    • OnLabelledCollectionProtocol (class in quapy.protocol) @@ -975,10 +973,6 @@
    • (quapy.classification.neural.NeuralClassifierTrainer method)
    • (quapy.classification.svmperf.SVMperf method) -
    • -
    • (quapy.method.aggregative.OneVsAll method) -
    • -
    • (quapy.method.base.OneVsAllGeneric method)
    • (quapy.method.meta.Ensemble method)
    • diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv index 8d6eecf181f1cdd77e45b8ddf90d4b282edd6c79..3c30cfbd5de325c17c09e61fe66b287eec1d512e 100644 GIT binary patch delta 2770 zcmV;@3N7`y7ONJJb${J<;y4n3@ADMwnZ1oqch1?Hxl0pfCTD=qA)W2H(h;@*ZEVXU z2?BF9ud%PUPqHQ11{(xeKcp`@O^oU*{gg^ovh4i1!o{{%Y=6#JvPwzsQ}0i8C0fcX z{i(cm`_JC(O?fp=u@H1lV=O7l4PxyAohVU^Q##{UB1T{OFMrd1TZ1m$`+P=0C-V5q z>y^mQr2dU06V7IMMpG&zjr)}h;UBB*3l`+#HI@h>kWyW6RuC??AE$jzrM5>Rmn`{E zwJ2WyD9w_iHBNto_JWgy#_|WMMujR9vI5s~Js}-W*;WzA^;WcK$ljlE{x)O_-Qmw< z!3mTLRom125P#G5vXTV>Yr!06;`+J*O^nWp!c2q9>cj{3q2xFhbH+1YRXJJl%0K!~ zmJ$~A$K#wx&xz4uMG1>n8ObFg*^DF!%@<)BUh~7*f13;AMDwb)SV~T3D@j6t*DS`f zRf_o*3HlfDk0>hqaQZR|Hh2~6dZSk>2tIr@IQ;n4>!mNN7bivXDotJ-92a1nU6nLqPUdTr7wTwgg+9Pa~ zQ1H1bS`gQ&`Sl$8u__pWcg&Nk@q^F&V-?q!jmKKTFImSruC;g_Yuey7d_t?7hMaIoW;6$fW%O0u0DotOC)dGefjZ9b;jwjq#L=)4=wHrWHq>e(4^dU3c&{+Dmq+ErBLnKs-Msp(LCHmHX zY=lL-fVp!G_89Ye=1fcpe0TTh?#_z>PJ@zn;l+^eRLU}ga$vk;ZrKsw>A(_Vb%&)g zTe}f%$w!aML|^K?Bhvk1G$-1IP^uhycLN}tGv4{cn=T#UnE*4B1nq>6RGG9yIF21eh<`!@jAPErUL*TkJ-*Sg%UIJfEzVNuww zB3AYA?o*p~h>S70>{K2Sw$-WgrzhOPPNe=w>@+n#f~ggZOH27`BlcAIdLUSTZ#=qU zZsJ}f`V9hRSm6kNplYq`=Aat#FMq5Mj5rG8;T(%A$TTtR9iQmvZIY(=V z{kxf7gQ9MF1j<_;u5K^(#q8%z~aAAe8P@{1EQ zw)zq;7Mv`!qxC05_x=8HD%ONtlHUDpN*1g@)s331FUOp{Ss}m?OeE>EJQ)Z{GhZC6 zhH6#Ro=%+)@Zweevbrz4z0Z@r+DbOx{OXR~e{LP6{#touQ^p<;9%k_OpF>x>QPVK? zZbYBGqZWd_>yazb!w;YLY=1UFt60OUx?YYml;8AscY*zgiUPGPEPo>`<7KrZI`)$Z z7pCWoWY4~Q*iL=gx}64U8h|`oG8m3LS_0BxTB1N9T#MrP_Q>d~Pmheg!aq;_(O@!t z96pUbr>G*0?!TkQm%(!YRG5JVkpBQ(JW|*lwkaTi#HDk943Pe%KYzEw3ZIVKK(Y{1 zIb=eNXbuX*F3I%3{`Ro-Fj9~XGnxwIIhsd%5`TDh4OqNK*NmoK!goH>VA43Bz7taT zcEO%>=ZSjC$*Z_e)BAQZ>exgBEsP*Qd4 z9H3{KS?`rHc+(Tmp?}!Z;e#(>v321+p$-IFdfQu4R zaC`WP1NL3ExAoyR_2LKTO+W*|Wzb9**PW-5rOzMxHLDJZe1BwrRpU(K00@sQPb1EC zlnyyw!vrBvvzP4*AO6FRhOb1UK^2M9U`PQ?%>Xz-E?T99OgkAaQ6963r^9DN_|`9IxE{0;ny@$ael$- zgn_H=>i*r`K!4w&q_G`sk39L>k}htedS(-rl`)Z{Qr>P>7zpRyfnd!#{jjq&>ipxt z-Ek-1BVmp3SOQPr)cH1jAVwc!@#?ixP_S1X1nl6bj2w4=WyHC98Y4{j)-A(CKjP}( z7+8+{4~!g8BE2QP%7*lfq;$zxGT9xc(VIRl8JUv~Cx7#CQemo(Sa8feZUHzuQ!O}WM#O?5JZe)WyMfHj#W8Dir`<0t3$$pY8RDHwIcQS zs5$m^Cfi$1H=?6iJ+KUdn-SfZV&D!!_U$j9&hjoZ2aI!m4P(VIp5-rq$OPPENhP#|aPdCoCen^Oy;OHC-QZ zuo>lV)AHQ8jXmyV?9K8>*3mi}oQ_7ilC=A}t_nv3%+oEnz2aim=;ARa>YfFiqnt@Z zHh)xfiaDCH&=q8#6nBzL*n6(txPj(0C2mr!DvQnxW#0N%v}9R#)fq1Ikb^y^orUZL zX#A$89VddVc-*~u0B)s>;vVWZ9SE8HoUpie`?*(N)1{!yVE>$1 zqtyOtUG7m$cjd7TN;a?8Wk=Q9yBo!-OMhjFukDAXs_$+_DpR?g?vl@@c>Ajoqc8S$ z7jGsIhD)_e))1_()m2-NynM)8mC^1}m8!3nrS$XVH~V50s!`(KB#o67N~YQ}+5YeE z|B7Dy9!1@#f84+MT7&tK+ImWnn$OHx0%G_+$$`o40jnMoUw} z-e}5?y#mK?ctOm6UgJp6@TRh!`44lnu=*8dN3h+q(Y@x2&RT+%-#rp-Y-se_4=h#U YLMPAd9xJbvLS=H@o2at?2hE+F&$#X`^pjj^OGH;8o$^rA#DPU)Oqh!}ktK7Y@KT@AYQ;PVLuy~vX< zFBc*|k@`21OgWq5IZdgMG#*wmgnz8IFIbR|*H|KmKuUGNSwXnmew_9NmD(POT(RUs z)uMRwqcls7HaPte+6zt+8p|K38WpNc$O>F4^n?sNWm`p{&|A@>5qo>W`Rj-+b%#HZ zB_~iWRBey*Lw~sKc_m8%)`B_C#N~Aba*W=J!nnbCb>aj2P;#7$1>>2os+_EO6(9Yb zrG!Pp$s{Mz(=j@%C}HtBBe_H*o0BA=`7%tyYkoNU?{Z2Csr$ZuDve!H2H~habP1e1E9{oap~{3Bj{!fp>m=8XhHN zL*t9LA;eU8MniBzI;JSlB-l?&m>KYlE|{92^YV}CK=DzE0#CKkGkKRgEt62O_5d3t z6k@K*7R04$e!0MYs0v2l4fEt;{NSE{sNxddc&H`(ly#`%Qj^!Argyx7PiURfkS@r0 zyt=~@)_=;Oyst%!uuz{;hLdvM^{KJgWYDdPKnZ$9i)Jt5n(EQ^JsM;IF;(#fE!KJ5 zY=8PUzEKch{@+IinuqtX;by0O@&>0gK`B!ki7qalfN2`6NnJi-L;93p?j8**rCx5qZ`ypO`8NN6-Kt&X1;T%=f_o%&) zSPJJF=!pqQBBVzZ`pi71Nh3TbX-bkIgP*Z^!_k%yeGgKR=z9@}vkk*>b{PXJX6xL+ zdJH5dCo*uH{K>%aaV-NbkPX1h26F|;%XPVL8wJiF3>))%XBJv;mLZxa>Q{REVD)Hx>v$F7l_5iU0A?^ zTX-l}-R30CiMAn>s({|#00`%d_dcqoOGkJvz|15;jqrggla>g_v4aS)C~Z-8@qgL2 zE{o4Lcr}M1ncjOXHLvb+=)cL zLBb3(9N~9V&6WKER7?Ju6@n2*VSk+bXDuX25PXx%&Z%JwO1vWRE7~p2(FSt=ex}!= zsJm6Uf)tGAQXpKw{Qpifmm1}eP~jBjZPeW|utBV|c9UY3sa!zT_Z}@sEsBogC+5`t zkfJ^-DJk(_f$2exVcj{f-d7Myse0+$(%)X0JCFngaY$2aFjY!@=&I!xM}IuF`Vudf zoGf*q^(REP!{OmltPMFQ-TZD^ELef6J1kpYj(PQFg@8aXm1M~Bw7q5z!)eYV4ZJrF(9Rrf z(;O6tW|8TE{q1h+VWc1(W;7MZb2Jb3B>wR18nAeet{Km~gl~ML!K86Mdn2Uq?SdWY z#uN3JlNWKDrXKC85n)WOp@qqzyExC;r-6C;M8i3Gs@fhWg5aipTM5%(dkND}n+ekZ zy9v|4+X+*@P7c<`0e^a?y%TVuT@BOY+@aXB(VZ`0?2}}JbeOSJAkVR!y^L?~ecJLe z{=Mu@cfiD(@YPTJ$*T*V|Mkg7dc}!Yu{3!sWRtCi=7ljgGuqPteR8{)LlTabBq!<> z5NLPj&3scM02d{s;P&t%PYd+f^4ACc)LSf^H%1Kvmq9aOTz_}tN|ruM?bob26Z%z+ z6^#QRqEly=?P;zZz0Yw^9E~`yf!l|J z_6ETck|IG`&+YU&%D5*N{mN-FBbdi4!;1d2&bDkcCqg1!z|aH>j=9Gz04F`wf^%X- zEGWXGHszWaYz4Jm5peFfT#+)_+Za^g>Iqvkr&z#`e#a#b1eYn~8mX;lLh-Ubh}%z& zvsaSOF@Hy54V{wV81w^&(?##zDbGaPiE$=b%=#Y~t_7mo>GZ5ywF|codoIC_&Rb1K zS|6+=PC}xzQz8i$mH^2lPHkl7>H*YPPFvmDvxoL1omJ1^*A-Tah8ZD=ewV3ZRgRG& z_?P19Ty>z@MdgFQNIh|D&V8N9_LkF+=xA1VEPsRKW=!8rIdBgl`}Q0P`&ZrCJSR5c zV&dZP6JLj)I5_-ce)y!Zy~mxRL{In6-<~Gu`H$(bof0Rk$|f8f`7AT7zSeMZ>{Ufh zc*s9t5^0XGCJ5$qeJaFel)uf&qxv@XxRbFr%L7?Q>uhm)8tGcn{`0yj9NlA{ZXxUy zXMej!7mqnnw>0Pi-PDQVrqb&tMn#aip?U%ZB m1gp4vAlll{7<8ZUs>Fp}Ja>DnyjBX8i|fHumHj{WR`Pg%;dhh( diff --git a/docs/build/html/quapy.classification.html b/docs/build/html/quapy.classification.html index 9f66350..8e2a6b9 100644 --- a/docs/build/html/quapy.classification.html +++ b/docs/build/html/quapy.classification.html @@ -168,7 +168,7 @@ training set afterwards. Default value is 5.

      fit_cv(X, y)

      Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all training instances via cross-validation, and then retrains the classifier on all training instances. -The posterior probabilities thus generated are used for calibrating the outpus of the classifier.

      +The posterior probabilities thus generated are used for calibrating the outputs of the classifier.

      Parameters:
        diff --git a/docs/build/html/quapy.data.html b/docs/build/html/quapy.data.html index 644725f..52a2d9c 100644 --- a/docs/build/html/quapy.data.html +++ b/docs/build/html/quapy.data.html @@ -222,10 +222,10 @@ the collection), prevs (the prevalence values for each class)

        -class quapy.data.base.LabelledCollection(instances, labels, classes_=None)
        +class quapy.data.base.LabelledCollection(instances, labels, classes=None)

        Bases: object

        -

        A LabelledCollection is a set of objects each with a label associated to it. This class implements many sampling -routines.

        +

        A LabelledCollection is a set of objects each with a label attached to each of them. +This class implements several sampling routines and other utilities.

        Parameters:
          @@ -252,7 +252,7 @@ from the labels. The classes must be indicated in cases in which some of the lab
          property Xp

          Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from -a LabelledCollection object.

          +a LabelledCollection object.

          Returns:

          a tuple (instances, prevalence) from this collection

          @@ -299,7 +299,7 @@ as listed by self.classes_

          -kFCV(nfolds=5, nrepeats=1, random_state=0)
          +kFCV(nfolds=5, nrepeats=1, random_state=None)

          Generator of stratified folds to be used in k-fold cross validation.

          Parameters:
          @@ -338,6 +338,23 @@ these arguments are used to call loader_func(path, **loader_kwargs)
          +
          +
          +classmethod mix(a: LabelledCollection, b: LabelledCollection)
          +

          Returns a new LabelledCollection as the union of this collection with another collection.

          +
          +
          Parameters:
          +
          +
          +
          Returns:
          +

          a LabelledCollection representing the union of both collections

          +
          +
          +
          +
          property n_classes
          @@ -374,7 +391,7 @@ as listed by self.classes_

          -sampling(size, *prevs, shuffle=True)
          +sampling(size, *prevs, shuffle=True, random_state=None)

          Return a random sample (an instance of LabelledCollection) of desired size and desired prevalence values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than the actual prevalence of the class, or with replacement otherwise.

          @@ -386,6 +403,7 @@ the actual prevalence of the class, or with replacement otherwise.

          it is constrained. E.g., for binary collections, only the prevalence p for the first class (as listed in self.classes_ can be specified, while the other class takes prevalence value 1-p

        • shuffle – if set to True (default), shuffles the index before returning it

        • +
        • random_state – seed for reproducing sampling

        Returns:
        @@ -412,7 +430,7 @@ index.

        -sampling_index(size, *prevs, shuffle=True)
        +sampling_index(size, *prevs, shuffle=True, random_state=None)

        Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the prevalence values are not specified, then returns the index of a uniform sampling. For each class, the sampling is drawn without replacement if the requested prevalence is larger than @@ -425,6 +443,7 @@ the actual prevalence of the class, or with replacement otherwise.

        it is constrained. E.g., for binary collections, only the prevalence p for the first class (as listed in self.classes_ can be specified, while the other class takes prevalence value 1-p

      • shuffle – if set to True (default), shuffles the index before returning it

      • +
      • random_state – seed for reproducing sampling

      Returns:
      diff --git a/docs/build/html/quapy.html b/docs/build/html/quapy.html index 682e83f..d72b33d 100644 --- a/docs/build/html/quapy.html +++ b/docs/build/html/quapy.html @@ -502,7 +502,7 @@ will be taken from the environment variable SAMPLE_SIZE (which has
      -class quapy.protocol.APP(data: LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=None, return_type='sample_prev')
      +class quapy.protocol.APP(data: LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, smooth_limits_epsilon=0, random_state=0, return_type='sample_prev')

      Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

      Implementation of the artificial prevalence protocol (APP). The APP consists of exploring a grid of prevalence values containing n_prevalences points (e.g., @@ -520,7 +520,8 @@ qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is rai grid (default is 21)

    • repeats – number of copies for each valid prevalence vector (default is 10)

    • smooth_limits_epsilon – the quantity to add and subtract to the limits 0 and 1

    • -
    • random_state – allows replicating samples across runs (default None)

    • +
    • random_state – allows replicating samples across runs (default 0, meaning that the sequence of samples +will be the same every time the protocol is called)

    • return_type – set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or to “labelled_collection” to get instead instances of LabelledCollection

    @@ -604,7 +605,7 @@ in the grid multiplied by repeat

    -class quapy.protocol.AbstractStochasticSeededProtocol(random_state=None)
    +class quapy.protocol.AbstractStochasticSeededProtocol(random_state=0)

    Bases: AbstractProtocol

    An AbstractStochasticSeededProtocol is a protocol that generates, via any random procedure (e.g., via random sampling), sequences of quapy.data.base.LabelledCollection samples. @@ -616,8 +617,8 @@ needed for extracting the samples, and

    Parameters:
    -

    random_state – the seed for allowing to replicate any sequence of samples. Default is None, meaning that -the sequence will be different every time the protocol is called.

    +

    random_state – the seed for allowing to replicate any sequence of samples. Default is 0, meaning that +the sequence will be consistent every time the protocol is called.

    @@ -659,7 +660,7 @@ the sequence will be different every time the protocol is called.

    -class quapy.protocol.DomainMixer(domainA: LabelledCollection, domainB: LabelledCollection, sample_size, repeats=1, prevalence=None, mixture_points=11, random_state=None, return_type='sample_prev')
    +class quapy.protocol.DomainMixer(domainA: LabelledCollection, domainB: LabelledCollection, sample_size, repeats=1, prevalence=None, mixture_points=11, random_state=0, return_type='sample_prev')

    Bases: AbstractStochasticSeededProtocol

    Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence.

    @@ -675,7 +676,8 @@ will be taken from the domain A (default).

  • mixture_points – an integer indicating the number of points to take from a linear scale (e.g., 21 will generate the mixture points [1, 0.95, 0.9, …, 0]), or the array of mixture values itself. the specific points

  • -
  • random_state

  • +
  • random_state – allows replicating samples across runs (default 0, meaning that the sequence of samples +will be the same every time the protocol is called)

  • @@ -719,7 +721,7 @@ the specific points

    -class quapy.protocol.NPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=None, return_type='sample_prev')
    +class quapy.protocol.NPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev')

    Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

    A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing samples uniformly at random, therefore approximately preserving the natural prevalence of the collection.

    @@ -730,7 +732,8 @@ samples uniformly at random, therefore approximately preserving the natural prev
  • sample_size – integer, the number of instances in each sample; if None (default) then it is taken from qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.

  • repeats – the number of samples to generate. Default is 100.

  • -
  • random_state – allows replicating samples across runs (default None)

  • +
  • random_state – allows replicating samples across runs (default 0, meaning that the sequence of samples +will be the same every time the protocol is called)

  • return_type – set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or to “labelled_collection” to get instead instances of LabelledCollection

  • @@ -802,7 +805,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

    <
    -class quapy.protocol.USimplexPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=None, return_type='sample_prev')
    +class quapy.protocol.USimplexPP(data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev')

    Bases: AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol

    A variant of APP that, instead of using a grid of equidistant prevalence values, relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with @@ -817,7 +820,8 @@ combinations of the grid values of APP makes this endeavour intractable.

  • sample_size – integer, the number of instances in each sample; if None (default) then it is taken from qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.

  • repeats – the number of samples to generate. Default is 100.

  • -
  • random_state – allows replicating samples across runs (default None)

  • +
  • random_state – allows replicating samples across runs (default 0, meaning that the sequence of samples +will be the same every time the protocol is called)

  • return_type – set to “sample_prev” (default) to get the pairs of (sample, prevalence) at each iteration, or to “labelled_collection” to get instead instances of LabelledCollection

  • diff --git a/docs/build/html/quapy.method.html b/docs/build/html/quapy.method.html index a3ba728..19a1e0b 100644 --- a/docs/build/html/quapy.method.html +++ b/docs/build/html/quapy.method.html @@ -781,9 +781,9 @@ validation data, or as an integer, indicating that the misclassification rates s
    -
    -class quapy.method.aggregative.OneVsAll(binary_quantifier, n_jobs=None)
    -

    Bases: AggregativeQuantifier

    +
    +class quapy.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs=None, parallel_backend='loky')
    +

    Bases: OneVsAllGeneric, AggregativeQuantifier

    Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1. @@ -795,12 +795,15 @@ This variant was used, along with the -

    -aggregate(classif_predictions)
    +
    +aggregate(classif_predictions)

    Implements the aggregation of label predictions.

    Parameters:
    @@ -812,21 +815,9 @@ one-vs-all manner

    -
    -
    -property classes_
    -

    Class labels, in the same order in which class prevalence values are to be computed. -This default implementation actually returns the class labels of the learner.

    -
    -
    Returns:
    -

    array-like

    -
    -
    -
    -
    -
    -classify(instances)
    +
    +classify(instances)

    If the base quantifier is not probabilistic, returns a matrix of shape (n,m,) with n the number of instances and m the number of classes. The entry (i,j) is a binary value indicating whether instance i `belongs to class `j. The binary classifications are independent of each other, meaning that an instance @@ -845,63 +836,6 @@ probabilities are independent of each other, meaning that, in general, they do n

    -
    -
    -fit(data: LabelledCollection, fit_classifier=True)
    -

    Trains the aggregative quantifier

    -
    -
    Parameters:
    -
      -
    • data – a quapy.data.base.LabelledCollection consisting of the training data

    • -
    • fit_classifier – whether or not to train the learner (default is True). Set to False if the -learner has been trained outside the quantifier.

    • -
    -
    -
    Returns:
    -

    self

    -
    -
    -
    - -
    -
    -get_params(deep=True)
    -

    Get parameters for this estimator.

    -
    -
    Parameters:
    -

    deep (bool, default=True) – If True, will return the parameters for this estimator and -contained subobjects that are estimators.

    -
    -
    Returns:
    -

    params – Parameter names mapped to their values.

    -
    -
    Return type:
    -

    dict

    -
    -
    -
    - -
    -
    -set_params(**parameters)
    -

    Set the parameters of this estimator.

    -

    The method works on simple estimators as well as on nested objects -(such as Pipeline). The latter have -parameters of the form <component>__<parameter> so that it’s -possible to update each component of a nested object.

    -
    -
    Parameters:
    -

    **params (dict) – Estimator parameters.

    -
    -
    Returns:
    -

    self – Estimator instance.

    -
    -
    Return type:
    -

    estimator instance

    -
    -
    -
    -
    @@ -1362,38 +1296,57 @@ validation data, or as an integer, indicating that the misclassification rates s
    -
    -class quapy.method.base.OneVsAllGeneric(binary_quantifier, n_jobs=None)
    +
    +class quapy.method.base.OneVsAll

    Bases: object

    +
    + +
    +
    +class quapy.method.base.OneVsAllGeneric(binary_quantifier, n_jobs=None, parallel_backend='loky')
    +

    Bases: OneVsAll, BaseQuantifier

    Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.

    -
    -property classes
    +
    +property classes_
    -fit(data: LabelledCollection, **kwargs)
    -
    - -
    -
    -get_params(deep=True)
    -
    +fit(data: LabelledCollection, fit_classifier=True) +

    Trains a quantifier.

    +
    +
    Parameters:
    +

    data – a quapy.data.base.LabelledCollection consisting of the training data

    +
    +
    Returns:
    +

    self

    +
    +
    +
    -quantify(X, *args)
    -
    - -
    -
    -set_params(**parameters)
    -
    +quantify(instances) +

    Generate class prevalence estimates for the sample’s instances

    +
    +
    Parameters:
    +

    instances – array-like

    +
    +
    Returns:
    +

    np.ndarray of shape (n_classes,) with class prevalence estimates.

    +
    +
    +
    +
    +
    +quapy.method.base.getOneVsAll(binary_quantifier, n_jobs=None, parallel_backend='loky')
    +
    +

    quapy.method.meta

    diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 1f4419a..99c18d6 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["Datasets", "Evaluation", "Installation", "Methods", "Model-Selection", "Plotting", "index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["Datasets.md", "Evaluation.md", "Installation.rst", "Methods.md", "Model-Selection.md", "Plotting.md", "index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Datasets", "Evaluation", "Installation", "Quantification Methods", "Model Selection", "Plotting", "Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"quapi": [0, 1, 2, 3, 4, 5], "make": [0, 1, 3, 8, 11], "avail": [0, 1, 2, 3, 5, 6, 9, 11], "sever": 0, "have": [0, 1, 2, 3, 4, 5, 8, 10, 11], "been": [0, 3, 4, 5, 8, 9, 10, 11], "us": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "quantif": [0, 1, 6, 8, 9, 10, 11], "literatur": [0, 1, 4, 6], "well": [0, 3, 4, 5, 11], "an": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "interfac": [0, 1, 11], "allow": [0, 1, 2, 3, 5, 8, 9, 10, 11], "anyon": 0, "import": [0, 1, 3, 4, 5, 6, 10, 11], "A": [0, 3, 8, 9, 10, 11], "object": [0, 8, 9, 10, 11], "i": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "roughli": 0, "pair": [0, 8], "labelledcollect": [0, 3, 4, 8, 10, 11], "one": [0, 1, 3, 4, 5, 8, 10, 11], "plai": 0, "role": 0, "train": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "set": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "anoth": [0, 1, 3, 5], "test": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "class": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "consist": [0, 4, 5, 8, 9, 10, 11], "iter": [0, 8, 11], "instanc": [0, 3, 4, 5, 6, 8, 9, 10, 11], "label": [0, 3, 4, 5, 6, 8, 9, 10, 11], "thi": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "handl": 0, "most": [0, 3, 5, 6, 8, 10, 11], "sampl": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "function": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11], "take": [0, 3, 5, 8, 10, 11], "look": [0, 1, 3, 5, 11], "follow": [0, 1, 3, 4, 5, 6, 8, 11], "code": [0, 3, 4, 5, 9], "qp": [0, 1, 3, 4, 5, 6, 8, 10, 11], "f": [0, 1, 3, 4, 5, 6, 10], "1st": 0, "posit": [0, 3, 5, 8, 10, 11], "document": [0, 1, 3, 5, 9, 10, 11], "2nd": 0, "onli": [0, 3, 5, 8, 9, 10, 11], "neg": [0, 5, 8, 11], "neutral": 0, "3rd": 0, "2": [0, 1, 3, 5, 8, 10, 11], "0": [0, 1, 3, 4, 5, 8, 9, 10, 11], "1": [0, 1, 3, 4, 5, 8, 9, 10, 11], "print": [0, 1, 3, 4, 6, 9, 10], "strprev": [0, 1, 8], "preval": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "prec": [0, 8], "output": [0, 1, 3, 4, 9, 10, 11], "show": [0, 1, 3, 4, 5, 8, 9, 10, 11], "digit": 0, "precis": [0, 1, 8], "17": 0, "50": [0, 5, 8, 11], "33": [0, 5, 8], "One": [0, 1, 3, 11], "can": [0, 1, 2, 3, 4, 5, 8, 10, 11], "easili": [0, 2, 5, 9], "produc": [0, 1, 5, 8], "new": [0, 3, 8, 9, 10], "desir": [0, 1, 10], "sample_s": [0, 1, 3, 4, 5, 8, 11], "10": [0, 1, 4, 5, 8, 9, 11], "prev": [0, 1, 8, 10], "4": [0, 1, 3, 4, 5, 10, 11], "5": [0, 1, 3, 4, 5, 8, 9, 10, 11], "which": [0, 1, 3, 4, 5, 8, 9, 10, 11], "40": [0, 3, 4, 11], "made": [0, 2, 8, 10, 11], "across": [0, 1, 4, 5, 6, 8, 11], "differ": [0, 1, 3, 4, 5, 6, 8, 10, 11], "run": [0, 1, 2, 3, 4, 5, 8, 10, 11], "e": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "g": [0, 1, 3, 4, 6, 8, 10, 11], "method": [0, 1, 4, 5, 6, 8], "same": [0, 3, 5, 8, 10, 11], "exact": [0, 10], "retain": [0, 3, 9, 11], "index": [0, 3, 6, 8, 9, 10, 11], "gener": [0, 1, 3, 4, 5, 8, 9, 10, 11], "sampling_index": [0, 10], "sampling_from_index": [0, 10], "also": [0, 1, 2, 3, 5, 6, 8, 9], "implement": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "artifici": [0, 1, 3, 4, 5, 6, 8], "protocol": [0, 3, 4, 5, 6, 7, 10, 11], "via": [0, 2, 3, 8, 9, 11], "python": [0, 6], "": [0, 1, 3, 4, 5, 8, 9, 10, 11], "seri": [0, 10], "equidist": [0, 8], "rang": [0, 5, 8, 11], "entir": [0, 3, 4, 5, 8], "spectrum": [0, 1, 4, 5, 8], "simplex": [0, 8], "space": [0, 4, 8, 9], "artificial_sampling_gener": 0, "100": [0, 1, 3, 4, 5, 8, 9, 10, 11], "n_preval": [0, 8], "each": [0, 1, 3, 4, 5, 8, 9, 10, 11], "valid": [0, 1, 3, 4, 5, 8, 9, 10, 11], "combin": [0, 1, 4, 8, 11], "origin": [0, 3, 8, 10], "from": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "split": [0, 3, 4, 5, 8, 9, 10, 11], "point": [0, 1, 3, 8, 10], "25": [0, 5, 8, 9, 11], "75": [0, 5, 8], "00": [0, 1, 4], "see": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "evalu": [0, 3, 4, 5, 6, 7, 9, 10, 11], "wiki": [0, 3], "further": [0, 1, 3, 9, 10, 11], "detail": [0, 1, 3, 6, 9, 10, 11], "how": [0, 1, 3, 4, 5, 8, 10, 11], "properli": 0, "three": [0, 5], "about": [0, 5, 8, 10], "kindl": [0, 1, 3, 5, 10, 11], "devic": [0, 3, 5, 9, 11], "harri": 0, "potter": 0, "known": [0, 3, 4, 8, 11], "imdb": [0, 5, 10], "movi": 0, "fetch": [0, 6], "unifi": [0, 11], "For": [0, 1, 5, 6, 8, 10], "exampl": [0, 1, 3, 4, 5, 8, 9, 10, 11], "fetch_review": [0, 1, 3, 4, 5, 10, 11], "These": [0, 9], "esuli": [0, 2, 3, 9, 10, 11], "moreo": [0, 3, 4, 10, 11], "sebastiani": [0, 3, 4, 10, 11], "2018": [0, 3, 10], "octob": [0, 3], "recurr": [0, 3, 10], "neural": [0, 8, 10], "network": [0, 8, 9, 10, 11], "In": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "proceed": [0, 3, 10], "27th": [0, 3, 10], "acm": [0, 3, 10, 11], "intern": [0, 3, 9, 10], "confer": [0, 3, 9, 10], "inform": [0, 1, 3, 4, 8, 9, 10, 11], "knowledg": [0, 3, 10], "manag": [0, 3, 10], "pp": [0, 3, 9], "1775": [0, 3], "1778": [0, 3], "The": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11], "list": [0, 5, 8, 9, 10, 11], "id": [0, 3, 10], "reviews_sentiment_dataset": [0, 10], "some": [0, 1, 3, 5, 8, 10, 11], "statist": [0, 1, 8, 11], "fhe": 0, "ar": [0, 1, 3, 4, 5, 8, 9, 10, 11], "summar": 0, "below": [0, 2, 3, 5, 8, 10], "size": [0, 1, 3, 8, 9, 10, 11], "type": [0, 3, 8, 10, 11], "hp": [0, 3, 4, 10], "9533": 0, "18399": 0, "018": 0, "982": 0, "065": 0, "935": 0, "text": [0, 3, 8, 9, 10, 11], "3821": [0, 10], "21591": [0, 10], "081": [0, 10], "919": [0, 10], "063": [0, 10], "937": [0, 10], "25000": 0, "500": [0, 1, 4, 5, 11], "11": [0, 1, 6, 8], "analysi": [0, 3, 6, 10], "access": [0, 3, 10, 11], "were": 0, "tf": [0, 10], "idf": 0, "format": [0, 5, 10, 11], "present": [0, 3, 10], "two": [0, 1, 3, 4, 5, 8, 10, 11], "val": [0, 9, 10], "model": [0, 1, 5, 6, 8, 9, 11], "select": [0, 3, 6, 8, 10, 11], "purpos": [0, 11], "exemplifi": 0, "load": [0, 3, 8, 10, 11], "fetch_twitt": [0, 3, 6, 10], "gasp": [0, 10], "for_model_select": [0, 10], "true": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "gao": [0, 3, 10, 11], "w": [0, 3, 10], "2015": [0, 2, 3, 9, 11], "august": 0, "tweet": [0, 3, 10], "classif": [0, 1, 3, 6, 8, 10, 11], "ieee": 0, "advanc": [0, 6], "social": [0, 3, 10], "mine": [0, 3], "asonam": 0, "97": 0, "104": 0, "semeval13": [0, 10], "semeval14": [0, 10], "semeval15": [0, 10], "share": [0, 10], "semev": 0, "mean": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "would": [0, 1, 3, 5, 6, 10, 11], "get": [0, 1, 5, 8, 9, 10, 11], "when": [0, 1, 3, 4, 5, 8, 9, 10], "request": [0, 8, 10, 11], "ani": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "them": [0, 3, 11], "consult": [0, 1], "twitter_sentiment_datasets_test": [0, 10], "9": [0, 1, 3, 5, 8], "replac": [0, 3, 10], "twitter_sentiment_datasets_train": [0, 10], "found": [0, 3, 4, 8, 9, 10], "featur": [0, 10], "3": [0, 1, 3, 5, 6, 8, 9, 10, 11], "8788": 0, "3765": 0, "694582": 0, "421": 0, "496": 0, "082": [0, 1], "407": 0, "507": 0, "086": 0, "spars": [0, 10], "hcr": [0, 3, 10], "1594": 0, "798": 0, "222046": 0, "546": 0, "211": 0, "243": 0, "640": 0, "167": 0, "193": 0, "omd": [0, 10], "1839": 0, "787": 0, "199151": 0, "463": 0, "271": 0, "266": 0, "437": 0, "283": [0, 1], "280": 0, "sander": [0, 10], "2155": 0, "923": 0, "229399": 0, "161": 0, "691": 0, "148": 0, "164": [0, 3], "688": 0, "11338": 0, "3813": 0, "1215742": 0, "159": 0, "470": 0, "372": 0, "158": 0, "430": 0, "412": 0, "1853": 0, "109": 0, "361": 0, "530": 0, "2390": 0, "153": 0, "413": 0, "434": 0, "semeval16": [0, 6, 10], "8000": 0, "2000": 0, "889504": 0, "157": 0, "351": 0, "492": 0, "163": [0, 1], "341": 0, "497": 0, "sst": [0, 10], "2971": 0, "1271": 0, "376132": 0, "261": 0, "452": 0, "288": 0, "207": 0, "481": 0, "312": 0, "wa": [0, 3, 5, 8, 10, 11], "2184": 0, "936": 0, "248563": 0, "305": 0, "414": 0, "281": 0, "282": 0, "446": 0, "272": 0, "wb": [0, 10], "4259": 0, "1823": 0, "404333": 0, "270": 0, "392": 0, "337": 0, "274": 0, "335": 0, "32": [0, 6], "repositori": [0, 10], "p\u00e9rez": [0, 3, 10, 11], "g\u00e1llego": [0, 3, 10, 11], "p": [0, 3, 8, 9, 10, 11], "quevedo": [0, 3, 10], "j": [0, 3, 10, 11], "r": [0, 3, 8, 10], "del": [0, 3, 10], "coz": [0, 3, 10], "2017": [0, 3, 10, 11], "ensembl": [0, 6, 10, 11], "problem": [0, 3, 5, 8, 10, 11], "characteriz": [0, 3, 10], "chang": [0, 1, 3, 10], "distribut": [0, 3, 5, 8, 10, 11], "case": [0, 1, 3, 4, 5, 8, 9, 10, 11], "studi": [0, 3, 10], "fusion": [0, 3, 10], "34": [0, 3, 10, 11], "87": [0, 3, 10], "doe": [0, 2, 3, 8, 11], "exactli": 0, "coincid": [0, 6], "et": [0, 2, 9, 10, 11], "al": [0, 2, 9, 10, 11], "sinc": [0, 1, 3, 5, 10, 11], "we": [0, 1, 3, 4, 5, 6, 10], "unabl": 0, "find": [0, 4, 11], "diabet": 0, "phonem": 0, "call": [0, 1, 5, 8, 10, 11], "fetch_ucidataset": [0, 3, 10], "yeast": [0, 10], "verbos": [0, 1, 4, 8, 9, 10, 11], "return": [0, 1, 3, 4, 5, 8, 9, 10, 11], "randomli": [0, 10], "drawn": [0, 1, 4, 8, 10], "stratifi": [0, 3, 9, 10, 11], "manner": [0, 9, 11], "whole": [0, 1, 3, 4, 8, 9], "collect": [0, 8, 9, 10], "70": 0, "30": [0, 1, 3, 11], "respect": [0, 1, 5, 8, 11], "option": [0, 1, 3, 5, 10, 11], "indic": [0, 1, 3, 4, 5, 8, 9, 10, 11], "descript": [0, 10], "should": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "standard": [0, 1, 5, 8, 9, 10, 11], "paper": [0, 3, 9, 11], "submit": 0, "kfcv": [0, 9, 10, 11], "order": [0, 2, 3, 5, 8, 10, 11], "accommod": 0, "practic": [0, 4], "could": [0, 1, 3, 4, 5, 6], "first": [0, 1, 2, 3, 5, 8, 10, 11], "instanti": [0, 1, 3, 4, 8, 9, 11], "creat": [0, 6, 8, 11], "time": [0, 1, 3, 8, 10], "fetch_ucilabelledcollect": [0, 10], "nfold": [0, 8, 10], "nrepeat": [0, 10], "abov": [0, 3, 5, 8], "conduct": [0, 8], "2x5fcv": 0, "all": [0, 1, 2, 3, 5, 8, 9, 11], "come": [0, 8, 10, 11], "numer": [0, 1, 3, 6, 10, 11], "form": [0, 8, 10, 11], "dens": [0, 11], "matric": [0, 5, 10], "acut": 0, "120": 0, "6": [0, 1, 3, 5, 10], "508": 0, "b": [0, 8, 10, 11], "583": 0, "417": 0, "balanc": [0, 4, 11], "625": 0, "539": 0, "461": 0, "922": 0, "078": 0, "breast": 0, "cancer": 0, "683": 0, "350": 0, "650": 0, "cmc": 0, "1473": 0, "573": 0, "427": 0, "774": 0, "226": 0, "653": 0, "347": 0, "ctg": 0, "2126": 0, "22": [0, 3, 9, 10], "222": [0, 9], "778": 0, "861": 0, "139": 0, "917": 0, "083": 0, "german": 0, "1000": [0, 4, 11], "24": [0, 9], "300": [0, 1, 9], "700": 0, "haberman": [0, 3], "306": 0, "735": 0, "265": 0, "ionospher": 0, "641": 0, "359": 0, "iri": 0, "150": 0, "667": 0, "333": 0, "mammograph": 0, "830": 0, "514": 0, "486": 0, "pageblock": 0, "5473": 0, "979": 0, "021": 0, "semeion": 0, "1593": 0, "256": [0, 9], "901": 0, "099": 0, "sonar": 0, "208": 0, "60": 0, "534": 0, "466": 0, "spambas": 0, "4601": 0, "57": 0, "606": 0, "394": 0, "spectf": 0, "267": 0, "44": 0, "794": 0, "206": 0, "tictacto": 0, "958": 0, "transfus": 0, "748": 0, "762": 0, "238": 0, "wdbc": 0, "569": 0, "627": 0, "373": 0, "wine": 0, "178": 0, "13": [0, 9], "669": 0, "331": 0, "601": 0, "399": 0, "730": 0, "q": [0, 2, 3, 8, 9, 11], "red": 0, "1599": 0, "465": 0, "535": 0, "white": 0, "4898": 0, "665": 0, "1484": 0, "8": [0, 1, 5, 10, 11], "711": 0, "289": 0, "download": [0, 2, 3, 8, 10], "automat": [0, 1], "thei": [0, 3, 11], "store": [0, 9, 10, 11], "quapy_data": [0, 8], "folder": [0, 10, 11], "faster": [0, 10], "reus": [0, 3, 8, 10], "howev": [0, 4, 5], "requir": [0, 1, 3, 6, 9], "special": [0, 5, 10], "action": 0, "moment": [0, 3], "fulli": [0, 8], "autom": [0, 3, 6], "cardiotocographi": 0, "excel": 0, "file": [0, 5, 8, 9, 10, 11], "user": [0, 1, 5], "instal": [0, 3, 6, 9, 11], "xlrd": [0, 2], "modul": [0, 1, 3, 5, 6, 7], "open": [0, 6, 10], "page": [0, 2, 6], "block": [0, 8], "need": [0, 3, 8, 10, 11], "unix": 0, "compress": 0, "extens": [0, 2, 5], "z": [0, 10], "directli": [0, 1, 3], "doabl": 0, "packag": [0, 2, 3, 6, 7], "like": [0, 1, 3, 5, 8, 9, 10, 11], "gzip": 0, "zip": [0, 5], "uncompress": 0, "o": [0, 8], "depend": [0, 1, 4, 5, 8, 11], "softwar": 0, "manual": 0, "do": [0, 1, 3, 4, 8, 9, 10, 11], "invok": [0, 1, 3, 8, 10], "provid": [0, 3, 5, 6, 10, 11], "loader": [0, 10], "simpl": [0, 3, 5, 11], "deal": 0, "t": [0, 1, 3, 8, 9, 11], "pre": [0, 3], "n": [0, 1, 8, 9, 11], "second": [0, 1, 3, 5, 8, 10], "represent": [0, 3, 8, 9, 11], "col": [0, 10], "int": [0, 5, 8, 10, 11], "float": [0, 3, 8, 9, 10, 11], "charg": [0, 10], "classmethod": [0, 8, 10, 11], "def": [0, 1, 3, 5, 8], "cl": 0, "path": [0, 3, 5, 8, 9, 10, 11], "str": [0, 8, 10, 11], "loader_func": [0, 10], "callabl": [0, 8, 10, 11], "defin": [0, 3, 8, 9, 10, 11], "argument": [0, 1, 3, 5, 8, 10, 11], "initi": [0, 9, 11], "particular": [0, 1, 3, 11], "receiv": [0, 3, 5], "addition": 0, "number": [0, 1, 3, 5, 8, 9, 10, 11], "specifi": [0, 1, 3, 5, 8, 9, 10], "otherwis": [0, 3, 8, 10], "infer": [0, 10], "least": [0, 10], "pass": [0, 1, 5, 8, 9, 11], "along": [0, 3, 8, 11], "train_path": [0, 10], "my_data": 0, "dat": [0, 9], "test_path": [0, 10], "my_custom_load": 0, "rb": 0, "fin": 0, "preprocess": [0, 1, 3, 8, 11], "includ": [0, 1, 3, 5, 6, 10, 11], "text2tfidf": [0, 1, 3, 10], "tfidf": [0, 4, 5, 10], "vector": [0, 8, 9, 10, 11], "reduce_column": [0, 10], "reduc": [0, 10], "column": [0, 10], "base": [0, 3, 6, 8, 9], "term": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "frequenc": [0, 10, 11], "transform": [0, 9, 10, 11], "valu": [0, 1, 3, 8, 9, 10, 11], "score": [0, 1, 4, 8, 9, 10], "subtract": [0, 8, 10], "normal": [0, 1, 3, 8, 10, 11], "deviat": [0, 1, 5, 8, 10], "so": [0, 1, 3, 5, 8, 9, 10, 11], "zero": [0, 8], "unit": [0, 8], "varianc": [0, 5], "textual": [0, 6, 10], "token": [0, 9, 10], "appeal": 1, "tool": [1, 6], "scenario": [1, 3, 4, 5, 6], "dataset": [1, 3, 4, 5, 6, 8, 9, 11], "shift": [1, 4, 6, 8, 9, 11], "particularli": 1, "prior": [1, 3, 4, 5, 6, 8, 11], "probabl": [1, 3, 4, 5, 6, 8, 9, 11], "That": [1, 4], "interest": [1, 5, 6, 8], "estim": [1, 3, 5, 6, 8, 9, 10, 11], "aris": 1, "under": 1, "belief": 1, "those": [1, 3, 4, 5, 8, 9, 11], "might": [1, 8, 10], "ones": [1, 3, 5, 8, 10, 11], "observ": [1, 11], "dure": [1, 5, 11], "other": [1, 3, 5, 6, 8, 10, 11], "word": [1, 3, 6, 9, 10, 11], "simpli": [1, 2, 3, 4, 5, 6, 8, 11], "predictor": 1, "assum": [1, 6, 11], "unlik": [1, 4, 8], "machin": [1, 4, 6, 9], "learn": [1, 2, 3, 4, 6, 8, 9, 10, 11], "govern": 1, "iid": [1, 5, 6], "assumpt": [1, 5, 6], "brief": [1, 10], "dedic": [1, 10], "explain": [1, 5], "here": [1, 11], "mae": [1, 4, 6, 8, 9, 11], "absolut": [1, 3, 5, 6, 8, 11], "mrae": [1, 6, 8, 9, 11], "rel": [1, 3, 8, 10, 11], "mse": [1, 3, 6, 8, 11], "squar": [1, 3, 8], "mkld": [1, 8, 11], "kullback": [1, 3, 8, 11], "leibler": [1, 3, 8, 11], "diverg": [1, 3, 8, 11], "mnkld": [1, 8, 11], "ae": [1, 2, 5, 8, 11], "rae": [1, 2, 8, 11], "se": [1, 8], "kld": [1, 2, 8, 9, 11], "nkld": [1, 2, 6, 8, 9, 11], "individu": [1, 3], "without": [1, 3, 8, 10], "averag": [1, 3, 8, 10, 11], "acc": [1, 3, 5, 6, 8, 11], "accuraci": [1, 5, 8, 11], "f1e": [1, 8], "f1": [1, 8, 9], "true_prev": [1, 5, 8], "prevs_hat": [1, 8], "ndarrai": [1, 3, 8, 10, 11], "contain": [1, 2, 3, 5, 8, 9, 10, 11], "smooth": [1, 8], "stabil": [1, 11], "third": [1, 5], "ep": [1, 8], "none": [1, 4, 8, 9, 10, 11], "paramet": [1, 3, 4, 8, 9, 10, 11], "epsilon": [1, 8, 11], "tradition": 1, "2t": [1, 8], "past": 1, "either": [1, 3, 8, 11], "environ": [1, 3, 4, 5, 8, 11], "variabl": [1, 3, 5, 8, 10], "onc": [1, 3, 5, 8, 10], "ommit": 1, "thereaft": 1, "recommend": [1, 5, 11], "np": [1, 3, 4, 5, 8, 10, 11], "asarrai": 1, "let": [1, 3, 11], "estim_prev": [1, 5, 8], "ae_": 1, "3f": [1, 6], "200": [1, 9], "600": 1, "914": 1, "final": [1, 3, 5, 11], "possibl": [1, 3, 8, 11], "string": [1, 8, 10, 11], "error_funct": 1, "from_nam": [1, 8], "accord": [1, 3, 4, 8, 9, 10, 11], "fix": [1, 4], "cover": [1, 4, 8, 9], "full": [1, 8], "contrast": 1, "natur": [1, 8], "despit": 1, "introduc": 1, "approxim": [1, 5, 8, 9], "preserv": [1, 5, 8], "procol": 1, "equal": [1, 8, 11], "distant": [1, 8], "interv": [1, 5, 8], "n_prevpoint": [1, 4, 5, 8], "determin": [1, 4, 5, 8], "constrain": [1, 5, 8, 10], "obtain": [1, 4, 8, 9, 11], "66": [1, 11], "given": [1, 3, 4, 8, 9, 10, 11], "num_prevalence_combin": [1, 8], "21": [1, 3, 5, 8], "n_class": [1, 3, 8, 9, 10, 11], "n_repeat": [1, 8], "1771": 1, "note": [1, 3, 4, 5, 8, 10], "last": [1, 3, 5, 8, 9, 10], "typic": [1, 4, 5, 8, 9, 10, 11], "singl": [1, 3, 6, 11], "higher": [1, 5], "comput": [1, 3, 5, 8, 11], "perform": [1, 3, 4, 5, 6, 8, 9, 11], "signific": 1, "instead": [1, 3, 4, 8, 10, 11], "work": [1, 3, 4, 5, 8, 10, 11], "wai": [1, 11], "around": [1, 10], "maximum": [1, 8, 9, 11], "budg": 1, "close": [1, 10], "than": [1, 4, 5, 8, 9, 10], "budget": [1, 4], "achiev": [1, 3, 4, 5], "get_nprevpoints_approxim": [1, 8], "5000": [1, 5], "4960": 1, "cost": 1, "sometim": 1, "cumbersom": 1, "control": [1, 4, 8], "overal": 1, "experi": [1, 2, 3, 4, 5, 8], "rather": [1, 4], "By": [1, 3, 8], "avoid": [1, 8], "lead": [1, 10], "closer": 1, "surpass": 1, "script": [1, 2, 3, 6, 11], "pacc": [1, 3, 5, 8, 11], "reli": [1, 3, 8, 11], "logist": [1, 3, 9, 11], "regressor": [1, 3], "classifi": [1, 4, 5, 6, 8, 9, 11], "variou": [1, 5], "metric": [1, 3, 4, 6, 8, 11], "sklearn": [1, 3, 4, 5, 6, 9, 10, 11], "linear_model": [1, 3, 4, 6, 9], "logisticregress": [1, 3, 4, 6, 9, 11], "data": [1, 3, 4, 5, 6, 8, 9, 11], "min_df": [1, 3, 4, 5, 10, 11], "inplac": [1, 3, 10, 11], "lr": [1, 3, 9, 11], "aggreg": [1, 4, 5, 6, 8], "fit": [1, 3, 4, 5, 6, 8, 9, 10, 11], "df": 1, "artificial_sampling_report": 1, "mani": [1, 3, 4, 5, 6, 8, 10, 11], "extract": [1, 8, 10], "categori": [1, 8], "n_repetit": [1, 4, 5], "n_job": [1, 3, 4, 8, 9, 10, 11], "parallel": [1, 3, 8, 9, 10, 11], "worker": [1, 8, 9, 10, 11], "cpu": [1, 9, 11], "random_se": [1, 8], "42": 1, "random": [1, 3, 4, 5, 8, 10], "seed": [1, 4, 8], "replic": [1, 4, 8], "error_metr": [1, 4, 8], "line": [1, 3, 8], "result": [1, 2, 3, 4, 5, 6, 11], "report": 1, "panda": [1, 2], "datafram": 1, "displai": [1, 5, 8, 9], "just": [1, 3], "clearer": 1, "shown": [1, 5, 8], "convert": [1, 3, 8, 9, 10, 11], "repres": [1, 3, 5, 8, 10, 11], "decim": 1, "default": [1, 3, 8, 9, 10, 11], "pd": 1, "set_opt": 1, "expand_frame_repr": 1, "fals": [1, 3, 5, 8, 9, 10, 11], "map": [1, 9, 11], "000": 1, "000e": 1, "091": 1, "909": 1, "009": 1, "048": 1, "426e": 1, "04": 1, "837": 1, "037": 1, "114": 1, "633e": 1, "03": 1, "7": [1, 5, 8, 9, 11], "717": 1, "017": 1, "041": 1, "383e": 1, "366": 1, "634": 1, "034": 1, "070": 1, "412e": 1, "459": 1, "541": 1, "387e": 1, "565": 1, "435": 1, "035": 1, "073": 1, "535e": 1, "654": 1, "346": 1, "046": 1, "108": 1, "701e": 1, "725": 1, "275": 1, "075": 1, "235": 1, "515e": 1, "02": 1, "858": 1, "142": 1, "042": 1, "229": 1, "740e": 1, "945": 1, "055": 1, "27": [1, 3, 9], "357": 1, "219e": 1, "578": 1, "dtype": [1, 10], "float64": 1, "artificial_sampling_ev": [1, 4], "artificial_sampling_predict": [1, 5], "arrai": [1, 3, 5, 8, 9, 10, 11], "pip": 2, "older": 2, "version": [2, 8, 9, 11], "scikit": [2, 3, 4, 8, 9, 10, 11], "numpi": [2, 4, 8, 9], "scipi": [2, 10], "pytorch": [2, 11], "quanet": [2, 6, 9, 11], "svmperf": [2, 3, 8, 11], "patch": [2, 3, 9, 11], "joblib": 2, "tqdm": 2, "matplotlib": [2, 8], "involv": [2, 5, 8], "you": [2, 3], "appli": [2, 3, 4, 5, 8, 9, 10, 11], "ext": 2, "compil": [2, 3], "sourc": [2, 3, 6, 9], "prepare_svmperf": [2, 3], "sh": [2, 3], "job": 2, "directori": [2, 8, 9, 10, 11], "svm_perf_quantif": [2, 3], "optim": [2, 3, 4, 8, 9, 11], "measur": [2, 3, 4, 5, 6, 8, 11], "propos": [2, 3, 11], "barranquero": [2, 3, 9, 11], "extend": [2, 3, 8, 11], "former": [2, 11], "categor": [3, 10], "belong": [3, 11], "non": [3, 11], "group": 3, "though": [3, 8], "plan": 3, "add": [3, 4, 8, 10], "more": [3, 5, 11], "futur": 3, "character": [3, 6], "fact": [3, 5], "product": [3, 10], "quantifi": [3, 4, 5, 6, 8, 10, 11], "shoud": 3, "basequantifi": [3, 8, 11], "abstract": [3, 8, 9, 10, 11], "abstractmethod": 3, "self": [3, 8, 9, 10, 11], "set_param": [3, 8, 9, 11], "get_param": [3, 8, 9, 11], "deep": [3, 8, 11], "familiar": 3, "structur": [3, 11], "inspir": 3, "reason": [3, 5, 6], "why": 3, "ha": [3, 4, 5, 8, 9, 10, 11], "adopt": [3, 4, 10], "respond": 3, "predict": [3, 4, 5, 8, 9, 11], "input": [3, 5, 8, 9, 11], "element": [3, 10, 11], "while": [3, 5, 9, 10, 11], "selector": 3, "process": [3, 4, 8], "hyperparamet": [3, 8, 11], "search": [3, 4, 6, 8, 11], "part": [3, 10], "aggregativequantifi": [3, 11], "must": [3, 10, 11], "fit_learn": 3, "classif_predict": [3, 11], "mention": 3, "befor": [3, 8, 9, 10, 11], "inde": [3, 4], "alreadi": [3, 8, 11], "preclassifi": 3, "maintain": [3, 11], "through": [3, 8], "properti": [3, 8, 9, 10, 11], "learner": [3, 4, 9, 11], "extern": 3, "probabilist": [3, 9, 11], "inherit": 3, "aggregativeprobabilisticquantifi": [3, 11], "posterior": [3, 8, 9, 11], "crisp": [3, 8, 11], "decis": [3, 8, 9, 11], "hard": [3, 9], "classif_posterior": [3, 11], "posterior_prob": [3, 11], "advantag": [3, 11], "procedur": [3, 6, 8], "veri": [3, 5], "effici": 3, "everi": [3, 8, 11], "leverag": 3, "speed": [3, 11], "up": [3, 4, 8, 9, 11], "over": [3, 4, 8], "customarili": [3, 4], "done": 3, "four": 3, "cc": [3, 5, 11], "simplest": 3, "deliv": [3, 11], "adjust": [3, 6, 8, 11], "pcc": [3, 4, 5, 11], "soft": 3, "serv": [3, 8, 10], "complet": [3, 5, 11], "equip": [3, 5], "svm": [3, 5, 6, 9, 10, 11], "linearsvc": [3, 5, 10], "pickl": [3, 8, 10, 11], "alia": [3, 8, 10, 11], "classifyandcount": [3, 11], "estim_preval": [3, 6, 11], "rate": [3, 8, 9, 11], "binari": [3, 5, 6, 8, 9, 10, 11], "init": 3, "addit": 3, "val_split": [3, 4, 9, 11], "integ": [3, 8, 9, 10, 11], "k": [3, 6, 8, 9, 10, 11], "fold": [3, 8, 10, 11], "cross": [3, 8, 9, 10, 11], "specif": [3, 4, 8], "held": [3, 4, 8, 9, 11], "out": [3, 4, 5, 8, 9, 10, 11], "postpon": 3, "constructor": 3, "prevail": 3, "overrid": 3, "illustr": [3, 4, 5], "seem": 3, "calibr": [3, 8], "calibratedclassifiercv": 3, "base_estim": 3, "cv": [3, 4], "predict_proba": [3, 9, 11], "As": [3, 4], "calibratedclassifi": 3, "except": [3, 8, 11], "rais": [3, 8, 11], "lastli": 3, "everyth": 3, "said": 3, "aboud": 3, "sld": [3, 11], "expectationmaximizationquantifi": [3, 11], "describ": [3, 8, 11], "saeren": [3, 11], "m": [3, 8, 11], "latinn": [3, 11], "decaesteck": [3, 11], "c": [3, 4, 8, 9, 10, 11], "2002": 3, "priori": 3, "14": 3, "41": 3, "attempt": [3, 11], "although": [3, 4, 5, 11], "improv": [3, 8, 9, 11], "rank": [3, 9], "almost": 3, "alwai": [3, 4, 5, 11], "among": 3, "effect": 3, "carri": [3, 10, 11], "gonz\u00e1lez": 3, "castro": 3, "v": [3, 8, 9, 11], "alaiz": 3, "rodr\u0131": 3, "guez": 3, "alegr": 3, "2013": 3, "scienc": 3, "218": 3, "146": 3, "It": [3, 4, 5, 8], "allia": 3, "hellingerdistancei": [3, 11], "mixtur": [3, 8, 11], "previou": 3, "overridden": [3, 11], "proport": [3, 4, 9, 10, 11], "taken": [3, 8, 9, 10], "itself": [3, 8, 11], "accept": 3, "elm": [3, 11], "famili": [3, 11], "target": [3, 5, 6, 8, 9, 11], "orient": [3, 6, 8, 11], "joachim": [3, 9, 11], "svmq": [3, 11], "d\u00edez": 3, "reliabl": 3, "pattern": 3, "recognit": 3, "48": 3, "591": 3, "604": 3, "svmkld": [3, 11], "multivari": [3, 9], "transact": 3, "discoveri": 3, "articl": [3, 4], "svmnkld": [3, 11], "svmae": [3, 11], "error": [3, 4, 6, 7, 9, 11], "svmrae": [3, 11], "what": 3, "nowadai": 3, "consid": [3, 5, 8, 9, 10, 11], "behav": [3, 5], "If": [3, 5, 8, 10, 11], "want": [3, 4], "custom": [3, 6, 10], "modifi": [3, 8], "assign": [3, 10], "Then": 3, "re": [3, 4, 9, 10], "thing": 3, "your": 3, "svmperf_hom": 3, "valid_loss": [3, 9, 11], "mycustomloss": 3, "28": [3, 10], "current": [3, 8, 9, 10, 11], "support": [3, 6, 9, 10, 11], "oper": 3, "trivial": 3, "strategi": [3, 4], "2016": [3, 10, 11], "sentiment": [3, 6, 10], "19": [3, 10], "onevsal": [3, 11], "know": 3, "where": [3, 5, 8, 9, 10, 11], "top": [3, 8, 11], "thu": [3, 4, 5, 8, 9, 11], "nor": 3, "castano": [3, 10], "2019": [3, 10, 11], "dynam": [3, 9, 10, 11], "task": [3, 4, 10], "45": [3, 5, 10], "15": [3, 8, 10], "polici": [3, 11], "processor": 3, "av": [3, 11], "ptr": [3, 11], "member": [3, 11], "d": [3, 11], "static": [3, 11], "red_siz": [3, 11], "pleas": 3, "check": [3, 4, 8], "offer": [3, 6], "torch": [3, 9, 11], "embed": [3, 9, 11], "lstm": [3, 9, 11], "cnn": [3, 11], "its": [3, 4, 8, 9, 11], "layer": [3, 9, 11], "neuralclassifiertrain": [3, 9, 11], "cnnnet": [3, 9, 11], "vocabulary_s": [3, 9, 10, 11], "cuda": [3, 9, 11], "supervis": [4, 6], "strongli": [4, 5], "good": [4, 5], "choic": [4, 11], "hyper": [4, 8, 9], "wherebi": 4, "chosen": [4, 8], "pick": 4, "best": [4, 8, 9, 11], "being": [4, 8, 11], "criteria": 4, "solv": [4, 11], "assess": 4, "own": 4, "right": [4, 8, 10], "impos": [4, 8], "aim": [4, 5], "appropri": 4, "configur": [4, 8], "design": 4, "long": [4, 9], "regard": 4, "next": [4, 8, 9, 10], "section": 4, "argu": 4, "alejandro": 4, "fabrizio": 4, "count": [4, 5, 6, 8, 10, 11], "arxiv": 4, "preprint": 4, "2011": 4, "02552": 4, "2020": [4, 9], "varieti": 4, "exhibit": [4, 5], "degre": 4, "model_select": [4, 7, 11], "gridsearchq": [4, 8, 11], "grid": [4, 8, 11], "explor": [4, 8], "portion": 4, "param_grid": [4, 8, 11], "logspac": [4, 11], "class_weight": [4, 11], "eval_budget": 4, "refit": [4, 8], "retrain": [4, 9], "goe": 4, "end": [4, 8, 11], "best_params_": 4, "best_model_": 4, "101": 4, "5f": 4, "system": [4, 11], "start": 4, "hyperparam": 4, "0001": [4, 11], "got": [4, 11], "24987": 4, "48135": 4, "001": [4, 9, 11], "24866": 4, "100000": 4, "43676": 4, "finish": 4, "param": [4, 8, 9, 11], "19982": 4, "develop": [4, 6], "1010": 4, "5005": 4, "54it": 4, "20342": 4, "altern": 4, "computation": 4, "costli": 4, "try": 4, "theoret": 4, "suboptim": 4, "opt": 4, "gridsearchcv": [4, 11], "10000": 4, "5379": 4, "55it": 4, "41734": 4, "wors": [4, 5, 8], "larg": 4, "between": [4, 5, 6, 8, 9, 11], "modal": 4, "turn": 4, "better": 4, "nonetheless": 4, "happen": [4, 5], "basic": [5, 11], "help": 5, "analys": [5, 6], "outcom": 5, "main": 5, "method_nam": [5, 8, 11], "name": [5, 8, 9, 10, 11], "shape": [5, 8, 9, 10, 11], "correspond": [5, 10], "matrix": [5, 8, 11], "appear": 5, "occur": [5, 10], "merg": 5, "emq": [5, 11], "55": 5, "showcas": 5, "wide": 5, "variant": [5, 6, 8, 11], "linear": [5, 8, 11], "review": [5, 6, 10], "step": [5, 8], "05": [5, 8, 11], "gen_data": 5, "base_classifi": 5, "yield": [5, 8, 10, 11], "tr_prev": [5, 8, 11], "append": 5, "__class__": 5, "__name__": 5, "insight": 5, "view": 5, "y": [5, 8, 9, 10, 11], "axi": [5, 8], "against": 5, "x": [5, 8, 9, 10, 11], "unfortun": 5, "limit": [5, 8, 11], "binary_diagon": [5, 8], "train_prev": [5, 8], "savepath": [5, 8], "bin_diag": 5, "png": 5, "save": [5, 8], "pdf": [5, 11], "cyan": 5, "dot": [5, 8], "color": [5, 8], "band": [5, 8], "hidden": [5, 9, 11], "show_std": [5, 8], "unadjust": 5, "bias": 5, "toward": [5, 10], "seen": [5, 8, 11], "evinc": 5, "box": [5, 8], "binary_bias_glob": [5, 8], "bin_bia": 5, "unbias": 5, "center": 5, "tend": 5, "overestim": 5, "high": [5, 8], "lower": [5, 11], "again": 5, "accordingli": 5, "20": [5, 8, 11], "90": [5, 8], "rewrit": 5, "method_data": 5, "training_preval": 5, "linspac": 5, "training_s": 5, "suffic": 5, "latex": 5, "syntax": 5, "_": [5, 8, 10], "now": 5, "clearli": 5, "binary_bias_bin": [5, 8], "broken": [5, 8], "down": [5, 8, 10], "bin": [5, 8, 11], "To": [5, 10], "nbin": [5, 8, 11], "isometr": [5, 8], "subinterv": 5, "interestingli": 5, "enough": 5, "seemingli": 5, "tendenc": 5, "low": [5, 8, 9], "underestim": 5, "beyond": 5, "67": [5, 8], "curios": 5, "pretti": 5, "discuss": 5, "analyz": 5, "compar": [5, 8], "both": 5, "irrespect": [5, 11], "harder": 5, "interpret": [5, 6, 11], "error_by_drift": [5, 8], "error_nam": [5, 8], "n_bin": [5, 8, 11], "err_drift": 5, "whenev": [5, 8], "clear": 5, "lowest": 5, "difficult": 5, "rememb": 5, "solid": 5, "comparison": 5, "detriment": 5, "visual": [5, 6], "hide": 5, "framework": [6, 11], "written": 6, "root": 6, "concept": 6, "baselin": 6, "integr": 6, "commonli": 6, "facilit": 6, "twitter": [6, 10], "true_preval": 6, "hold": [6, 8, 11], "endeavour": [6, 8], "popular": 6, "expect": [6, 11], "maxim": [6, 11], "hdy": [6, 11], "versatil": 6, "etc": 6, "uci": [6, 10], "nativ": 6, "loss": [6, 9, 11], "perf": [6, 9, 11], "ad": 6, "meta": [6, 8], "plot": [6, 7], "diagon": [6, 8], "bia": [6, 8, 9, 11], "drift": 6, "api": 6, "subpackag": 7, "submodul": 7, "util": [7, 9], "content": 7, "bctscalibr": 9, "nbvscalibr": 9, "recalibratedprobabilisticclassifi": 9, "recalibratedprobabilisticclassifierbas": 9, "classes_": [9, 10, 11], "fit_cv": 9, "fit_tr_val": 9, "tscalibr": 9, "vscalibr": 9, "lowranklogisticregress": 9, "document_embed": 9, "lstmnet": 9, "reset_net_param": 9, "textclassifiernet": 9, "dimens": [8, 9, 10, 11], "forward": [9, 11], "xavier_uniform": 9, "torchdataset": 9, "asdataload": 9, "decision_funct": 9, "splitstratifi": 10, "stat": 10, "train_test": 10, "xp": 10, "xy": 10, "split_random": 10, "split_stratifi": 10, "uniform_sampl": 10, "uniform_sampling_index": 10, "fetch_lequa2022": 10, "warn": 10, "indextransform": 10, "add_word": 10, "fit_transform": 10, "reader": 8, "binar": [8, 10], "from_csv": 10, "from_spars": 10, "from_text": 10, "reindex_label": 10, "getptecondestim": 11, "solve_adjust": 11, "adjustedclassifyandcount": 11, "distributionmatch": 11, "dy": 11, "em": 11, "max_it": 11, "explicitlossminimis": 11, "max": 11, "ms2": 11, "mediansweep": 11, "mediansweep2": 11, "probabilisticadjustedclassifyandcount": 11, "probabilisticclassifyandcount": 11, "smm": 11, "t50": 11, "thresholdoptim": 11, "cross_generate_predict": 11, "cross_generate_predictions_depr": 11, "binaryquantifi": 11, "onevsallgener": 11, "eacc": 11, "ecc": 11, "eemq": 11, "ehdi": 11, "epacc": 11, "valid_polici": 11, "ensemblefactori": 11, "get_probability_distribut": 11, "quanetmodul": 11, "quanettrain": 11, "clean_checkpoint": 11, "clean_checkpoint_dir": 11, "mae_loss": 11, "non_aggreg": 8, "maximumlikelihoodprevalenceestim": 11, "absolute_error": 8, "hat": 8, "frac": 8, "mathcal": 8, "sum_": 8, "acc_error": 8, "y_true": 8, "y_pred": 8, "tp": 8, "tn": 8, "fp": 8, "fn": 8, "stand": [8, 11], "f1_error": 8, "macro": 8, "f_1": 8, "harmon": 8, "recal": 8, "2tp": 8, "independ": [8, 11], "err_nam": 8, "p_hat": 8, "d_": 8, "kl": 8, "log": [8, 10], "factor": 8, "beforehand": 8, "n_sampl": [8, 9], "mean_absolute_error": 8, "mean_relative_absolute_error": 8, "relative_absolute_error": 8, "underlin": 8, "displaystyl": 8, "abstractprotocol": 8, "union": [8, 11], "aggr_speedup": 8, "auto": 8, "evaluation_report": 8, "app": [8, 11], "repeat": 8, "smooth_limits_epsilon": 8, "random_st": [8, 10], "return_typ": 8, "sample_prev": 8, "abstractstochasticseededprotocol": 8, "onlabelledcollectionprotocol": 8, "95": 8, "copi": [8, 10], "quantiti": 8, "labelled_collect": 8, "prevalence_grid": 8, "exhaust": 8, "sum": [8, 11], "implicit": 8, "return_constrained_dim": 8, "rest": [8, 9, 10, 11], "quit": 8, "obvious": 8, "determinist": 8, "anywher": 8, "multipli": 8, "necessari": 8, "samples_paramet": 8, "total": 8, "parent": 8, "sequenc": 8, "enforc": 8, "collat": 8, "arg": [8, 10, 11], "domainmix": 8, "domaina": 8, "domainb": 8, "mixture_point": 8, "domain": 8, "scale": [8, 9, 11], "npp": 8, "draw": 8, "uniformli": 8, "therefor": 8, "get_col": 8, "get_labelled_collect": 8, "on_preclassified_inst": 8, "pre_classif": 8, "in_plac": 8, "usimplexpp": 8, "kraemer": 8, "algorithm": [8, 11], "sens": 8, "guarante": [8, 10], "prefer": 8, "intract": 8, "hellingerdist": 8, "hellingh": 8, "distanc": [8, 11], "hd": [8, 11], "discret": [8, 11], "sqrt": 8, "p_i": 8, "q_i": 8, "real": [8, 9, 10, 11], "topsoedist": 8, "1e": [8, 9, 11], "topso": [8, 11], "adjusted_quantif": 8, "prevalence_estim": 8, "tpr": [8, 11], "fpr": [8, 11], "clip": 8, "exce": 8, "check_prevalence_vector": 8, "raise_except": 8, "toleranz": 8, "08": 8, "combinations_budget": 8, "largest": 8, "dimension": [8, 9, 10, 11], "repetit": 8, "less": [8, 10], "normalize_preval": 8, "l1": [8, 11], "calcul": 8, "binom": 8, "mass": 8, "alloc": [8, 9], "solut": 8, "star": 8, "bar": 8, "prevalence_from_label": 8, "n_instanc": [8, 9, 11], "correctli": 8, "even": 8, "len": 8, "prevalence_from_prob": 8, "bool": [8, 9, 11], "argmax": 8, "prevalence_linspac": 8, "01": [8, 9, 11], "separ": [8, 10], "99": 8, "uniform_prevalence_sampl": 8, "adapt": [8, 9], "post": 8, "http": [8, 10, 11], "stackexchang": 8, "com": 8, "question": 8, "3227": 8, "uniform": [8, 10], "uniform_simplex_sampl": 8, "dict": [8, 10, 11], "timeout": 8, "dictionari": [8, 9, 10, 11], "kei": [8, 10], "quantification_error": 8, "whether": [8, 9, 10, 11], "ignor": [8, 10, 11], "gen": 8, "establish": 8, "timer": 8, "longer": 8, "timeouterror": 8, "bound": [8, 11], "stdout": 8, "best_model": 8, "after": [8, 11], "minim": [8, 11], "routin": [8, 10, 11], "unus": [8, 9], "contanin": 8, "cross_val_predict": 8, "akin": [8, 11], "issu": 8, "reproduc": [8, 10], "pos_class": [8, 10], "titl": 8, "colormap": 8, "listedcolormap": 8, "vertical_xtick": 8, "legend": 8, "local": 8, "sign": 8, "minu": 8, "classs": 8, "compon": [8, 9, 11], "cm": 8, "tab10": 8, "secondari": 8, "global": 8, "method_ord": 8, "henc": [8, 10], "conveni": 8, "multiclass": [8, 10, 11], "inconveni": 8, "leyend": 8, "hightlight": 8, "associ": [8, 10], "brokenbar_supremacy_by_drift": 8, "isomer": 8, "x_error": 8, "y_error": 8, "ttest_alpha": 8, "005": 8, "tail_density_threshold": 8, "region": 8, "chart": 8, "condit": [8, 11], "ii": 8, "significantli": 8, "side": 8, "confid": 8, "percentil": 8, "divid": 8, "amount": 8, "similar": [8, 11], "threshold": [8, 11], "densiti": 8, "tail": 8, "discard": 8, "outlier": 8, "show_dens": 8, "show_legend": 8, "logscal": 8, "vline": 8, "especi": 8, "mai": 8, "cumberson": 8, "gain": 8, "understand": 8, "fare": 8, "regim": 8, "highlight": 8, "vertic": 8, "earlystop": 8, "patienc": [8, 9, 11], "lower_is_bett": 8, "earli": [8, 9, 11], "stop": [8, 9, 11], "epoch": [8, 9, 11], "best_epoch": 8, "best_scor": 8, "consecut": [8, 9, 11], "monitor": 8, "obtaind": 8, "far": [8, 9, 10], "flag": 8, "keep": 8, "track": 8, "boolean": [8, 10, 11], "create_if_not_exist": 8, "makedir": 8, "exist_ok": 8, "join": 8, "dir": 8, "subdir": 8, "anotherdir": 8, "create_parent_dir": 8, "exist": 8, "txt": 8, "download_fil": 8, "url": 8, "archive_filenam": 8, "destin": 8, "filenam": 8, "download_file_if_not_exist": 8, "dowload": 8, "get_quapy_hom": 8, "home": [8, 10], "perman": 8, "map_parallel": 8, "func": 8, "slice": 8, "item": 8, "wrapper": [8, 9, 10, 11], "multiprocess": 8, "delai": 8, "args_i": 8, "silent": [8, 11], "child": 8, "ensur": 8, "pickled_resourc": 8, "pickle_path": 8, "generation_func": 8, "fast": [8, 10], "resourc": 8, "some_arrai": 8, "mock": [8, 9], "rand": 8, "my_arrai": 8, "pkl": 8, "save_text_fil": 8, "disk": 8, "miss": 8, "temp_se": 8, "context": 8, "tempor": 8, "outer": 8, "state": 8, "within": [8, 11], "get_njob": [], "correct": [9, 11], "temperatur": [9, 11], "bct": [9, 11], "abstent": 9, "alexandari": [9, 11], "afterward": [9, 11], "No": [9, 11], "nbv": [9, 11], "baseestim": [9, 11], "calibratorfactori": 9, "n_compon": 9, "kwarg": [9, 10, 11], "decomposit": 9, "truncatedsvd": 9, "princip": 9, "regress": 9, "n_featur": 9, "length": [9, 10], "eventu": [9, 10], "unalt": 9, "emb": 9, "embedding_s": 9, "hidden_s": 9, "repr_siz": 9, "kernel_height": 9, "stride": 9, "pad": [9, 10], "drop_p": 9, "convolut": 9, "vocabulari": [9, 10], "kernel": 9, "drop": 9, "dropout": [9, 11], "batch": 9, "dataload": 9, "tensor": 9, "n_dimens": 9, "lstm_class_nlay": 9, "short": 9, "memori": 9, "net": 9, "weight_decai": 9, "batch_siz": 9, "64": [9, 11], "batch_size_test": 9, "512": [9, 11], "padding_length": 9, "checkpointpath": 9, "checkpoint": [9, 11], "classifier_net": 9, "weight": [9, 10], "decai": 9, "wait": 9, "enabl": 9, "gpu": [9, 11], "vocab_s": 9, "reiniti": 9, "trainer": 9, "disjoint": 9, "embed_s": 9, "nn": 9, "pad_length": 9, "xavier": 9, "shuffl": [9, 10], "longest": 9, "shorter": 9, "svmperf_bas": [9, 11], "classifiermixin": 9, "thorsten": 9, "refer": [9, 10], "svm_perf_learn": 9, "svm_perf_classifi": 9, "trade": 9, "off": 9, "margin": 9, "std": 9, "qacc": 9, "qf1": 9, "qgm": 9, "12": 9, "26": 9, "23": 9, "train_siz": 10, "conform": 10, "round": 10, "loader_kwarg": 10, "read": 10, "tupl": [10, 11], "tr": 10, "te": 10, "csr": 10, "csr_matrix": 10, "4403": 10, "my_collect": 10, "codefram": 10, "larger": [10, 11], "actual": [10, 11], "empti": 10, "met": 10, "whose": [10, 11], "train_prop": 10, "left": [8, 10], "stratif": 10, "greater": 10, "dataset_nam": 10, "data_hom": 10, "test_split": 10, "predefin": 10, "uci_dataset": 10, "dump": 10, "leav": 10, "quay_data": 10, "ml": 10, "5fcvx2": 10, "x2": 10, "offici": 10, "lequa": 10, "competit": 10, "t1a": 10, "t1b": 10, "t2a": 10, "t2b": 10, "raw": 10, "merchandis": 10, "sperduti": 10, "2022": 10, "overview": 10, "clef": 10, "lequa2022_experi": 10, "py": 10, "guid": 10, "val_gen": 10, "test_gen": 10, "samplesfromdir": 10, "minimun": 10, "kept": 10, "subsequ": 10, "mining6": 10, "devel": 10, "style": 10, "countvector": 10, "keyword": [10, 11], "nogap": 10, "regardless": 10, "codifi": 10, "unknown": 10, "surfac": 10, "assert": 10, "gap": 10, "preced": 10, "decid": 10, "uniqu": 10, "rare": 10, "unk": 10, "minimum": [10, 11], "occurr": 10, "org": [10, 11], "stabl": 10, "feature_extract": 10, "html": 10, "subtyp": 10, "spmatrix": 10, "remov": [10, 11], "infrequ": 10, "aka": [10, 11], "sublinear_tf": 10, "scall": 10, "counter": 10, "tfidfvector": 10, "whcih": 10, "had": 10, "encod": 10, "utf": 10, "csv": 10, "feat1": 10, "feat2": 10, "featn": 10, "covari": 10, "express": 10, "row": 10, "class2int": 10, "collet": 10, "fomart": 10, "progress": 10, "sentenc": 10, "classnam": 10, "u1": 10, "misclassif": 11, "n_classes_": [], "fit_classifi": 11, "bypass": 11, "y_": 11, "ptecondestim": 11, "prevs_estim": 11, "ax": 11, "entri": 11, "y_i": 11, "y_j": 11, "_posterior_probabilities_": 11, "attribut": 11, "subclass": 11, "give": 11, "outsid": 11, "unless": 11, "noth": 11, "els": 11, "cdf": 11, "match": 11, "helling": 11, "sought": 11, "channel": 11, "proper": 11, "ch": 11, "di": 11, "dij": 11, "fraction": 11, "th": 11, "tol": 11, "ternari": 11, "dl": 11, "doi": 11, "1145": 11, "3219819": 11, "3220059": 11, "histogram": 11, "toler": 11, "explicit": 11, "exact_train_prev": 11, "recalib": 11, "updat": 11, "likelihood": [9, 11], "mutual": 11, "recurs": 11, "until": 11, "converg": 11, "suggest": 11, "recalibr": 11, "reach": 11, "loop": 11, "cumul": 11, "unlabel": 11, "latter": 11, "forman": 11, "2006": 11, "2008": 11, "goal": 11, "bring": 11, "denomin": 11, "median": 11, "sweep": 11, "binary_quantifi": 11, "prevel": 11, "emploi": 11, "resp": 11, "subobject": 11, "nest": 11, "pipelin": 11, "__": 11, "simplif": 11, "2021": 11, "equival": 11, "cosest": 11, "heurist": 11, "choos": 11, "ground": 11, "complement": 11, "param_mod_sel": 11, "param_model_sel": 11, "min_po": 11, "max_sample_s": 11, "closest": 11, "preliminari": 11, "recomput": 11, "compat": 11, "l": 11, "base_quantifier_class": 11, "factori": 11, "common": 11, "doc_embedding_s": 11, "stats_siz": 11, "lstm_hidden_s": 11, "lstm_nlayer": 11, "ff_layer": 11, "1024": 11, "bidirect": 11, "qdrop_p": 11, "order_bi": 11, "cell": 11, "connect": 11, "ff": 11, "sort": 11, "doc_embed": 11, "doc_posterior": 11, "recip": 11, "care": 11, "regist": 11, "hook": 11, "n_epoch": 11, "tr_iter_per_poch": 11, "va_iter_per_poch": 11, "checkpointdir": 11, "checkpointnam": 11, "phase": 11, "anyth": 11, "truth": 11, "mlpe": 11, "lazi": 11, "put": 11, "assumpion": 11, "beat": [9, 11], "estimant": 11, "kundaj": 9, "shrikumar": 9, "novemb": 9, "232": 9, "pmlr": 9, "outpu": 9, "partit": 9, "ight": [], "valueerror": 8}, "objects": {"": [[8, 0, 0, "-", "quapy"]], "quapy": [[9, 0, 0, "-", "classification"], [10, 0, 0, "-", "data"], [8, 0, 0, "-", "error"], [8, 0, 0, "-", "evaluation"], [8, 0, 0, "-", "functional"], [11, 0, 0, "-", "method"], [8, 0, 0, "-", "model_selection"], [8, 0, 0, "-", "plot"], [8, 0, 0, "-", "protocol"], [8, 0, 0, "-", "util"]], "quapy.classification": [[9, 0, 0, "-", "calibration"], [9, 0, 0, "-", "methods"], [9, 0, 0, "-", "neural"], [9, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[9, 1, 1, "", "BCTSCalibration"], [9, 1, 1, "", "NBVSCalibration"], [9, 1, 1, "", "RecalibratedProbabilisticClassifier"], [9, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [9, 1, 1, "", "TSCalibration"], [9, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[9, 2, 1, "", "classes_"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "fit_cv"], [9, 3, 1, "", "fit_tr_val"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[9, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural": [[9, 1, 1, "", "CNNnet"], [9, 1, 1, "", "LSTMnet"], [9, 1, 1, "", "NeuralClassifierTrainer"], [9, 1, 1, "", "TextClassifierNet"], [9, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[9, 2, 1, "", "device"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "reset_net_params"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[9, 3, 1, "", "dimensions"], [9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "forward"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict_proba"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"], [9, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[9, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[9, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[9, 3, 1, "", "decision_function"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "set_params"], [9, 4, 1, "", "valid_losses"]], "quapy.data": [[10, 0, 0, "-", "base"], [10, 0, 0, "-", "datasets"], [10, 0, 0, "-", "preprocessing"], [10, 0, 0, "-", "reader"]], "quapy.data.base": [[10, 1, 1, "", "Dataset"], [10, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[10, 3, 1, "", "SplitStratified"], [10, 2, 1, "", "binary"], [10, 2, 1, "", "classes_"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 3, 1, "", "stats"], [10, 2, 1, "", "train_test"], [10, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[10, 2, 1, "", "X"], [10, 2, 1, "", "Xp"], [10, 2, 1, "", "Xy"], [10, 2, 1, "", "binary"], [10, 3, 1, "", "counts"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 2, 1, "", "p"], [10, 3, 1, "", "prevalence"], [10, 3, 1, "", "sampling"], [10, 3, 1, "", "sampling_from_index"], [10, 3, 1, "", "sampling_index"], [10, 3, 1, "", "split_random"], [10, 3, 1, "", "split_stratified"], [10, 3, 1, "", "stats"], [10, 3, 1, "", "uniform_sampling"], [10, 3, 1, "", "uniform_sampling_index"], [10, 2, 1, "", "y"]], "quapy.data.datasets": [[10, 5, 1, "", "fetch_UCIDataset"], [10, 5, 1, "", "fetch_UCILabelledCollection"], [10, 5, 1, "", "fetch_lequa2022"], [10, 5, 1, "", "fetch_reviews"], [10, 5, 1, "", "fetch_twitter"], [10, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[10, 1, 1, "", "IndexTransformer"], [10, 5, 1, "", "index"], [10, 5, 1, "", "reduce_columns"], [10, 5, 1, "", "standardize"], [10, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[10, 3, 1, "", "add_word"], [10, 3, 1, "", "fit"], [10, 3, 1, "", "fit_transform"], [10, 3, 1, "", "transform"], [10, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[10, 5, 1, "", "binarize"], [10, 5, 1, "", "from_csv"], [10, 5, 1, "", "from_sparse"], [10, 5, 1, "", "from_text"], [10, 5, 1, "", "reindex_labels"]], "quapy.error": [[8, 5, 1, "", "absolute_error"], [8, 5, 1, "", "acc_error"], [8, 5, 1, "", "acce"], [8, 5, 1, "", "ae"], [8, 5, 1, "", "f1_error"], [8, 5, 1, "", "f1e"], [8, 5, 1, "", "from_name"], [8, 5, 1, "", "kld"], [8, 5, 1, "", "mae"], [8, 5, 1, "", "mean_absolute_error"], [8, 5, 1, "", "mean_relative_absolute_error"], [8, 5, 1, "", "mkld"], [8, 5, 1, "", "mnkld"], [8, 5, 1, "", "mrae"], [8, 5, 1, "", "mse"], [8, 5, 1, "", "nkld"], [8, 5, 1, "", "rae"], [8, 5, 1, "", "relative_absolute_error"], [8, 5, 1, "", "se"], [8, 5, 1, "", "smooth"]], "quapy.evaluation": [[8, 5, 1, "", "evaluate"], [8, 5, 1, "", "evaluation_report"], [8, 5, 1, "", "prediction"]], "quapy.functional": [[8, 5, 1, "", "HellingerDistance"], [8, 5, 1, "", "TopsoeDistance"], [8, 5, 1, "", "adjusted_quantification"], [8, 5, 1, "", "check_prevalence_vector"], [8, 5, 1, "", "get_nprevpoints_approximation"], [8, 5, 1, "", "normalize_prevalence"], [8, 5, 1, "", "num_prevalence_combinations"], [8, 5, 1, "", "prevalence_from_labels"], [8, 5, 1, "", "prevalence_from_probabilities"], [8, 5, 1, "", "prevalence_linspace"], [8, 5, 1, "", "strprev"], [8, 5, 1, "", "uniform_prevalence_sampling"], [8, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[11, 0, 0, "-", "aggregative"], [11, 0, 0, "-", "base"], [11, 0, 0, "-", "meta"], [11, 0, 0, "-", "neural"], [11, 0, 0, "-", "non_aggregative"]], "quapy.method.aggregative": [[11, 1, 1, "", "ACC"], [11, 4, 1, "", "AdjustedClassifyAndCount"], [11, 1, 1, "", "AggregativeProbabilisticQuantifier"], [11, 1, 1, "", "AggregativeQuantifier"], [11, 1, 1, "", "CC"], [11, 4, 1, "", "ClassifyAndCount"], [11, 1, 1, "", "DistributionMatching"], [11, 1, 1, "", "DyS"], [11, 1, 1, "", "ELM"], [11, 1, 1, "", "EMQ"], [11, 4, 1, "", "ExpectationMaximizationQuantifier"], [11, 4, 1, "", "ExplicitLossMinimisation"], [11, 1, 1, "", "HDy"], [11, 4, 1, "", "HellingerDistanceY"], [11, 1, 1, "", "MAX"], [11, 1, 1, "", "MS"], [11, 1, 1, "", "MS2"], [11, 4, 1, "", "MedianSweep"], [11, 4, 1, "", "MedianSweep2"], [11, 1, 1, "", "OneVsAll"], [11, 1, 1, "", "PACC"], [11, 1, 1, "", "PCC"], [11, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [11, 4, 1, "", "ProbabilisticClassifyAndCount"], [11, 4, 1, "", "SLD"], [11, 1, 1, "", "SMM"], [11, 1, 1, "", "SVMAE"], [11, 1, 1, "", "SVMKLD"], [11, 1, 1, "", "SVMNKLD"], [11, 1, 1, "", "SVMQ"], [11, 1, 1, "", "SVMRAE"], [11, 1, 1, "", "T50"], [11, 1, 1, "", "ThresholdOptimization"], [11, 1, 1, "", "X"], [11, 5, 1, "", "cross_generate_predictions"], [11, 5, 1, "", "cross_generate_predictions_depr"]], "quapy.method.aggregative.ACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"], [11, 3, 1, "", "solve_adjustment"]], "quapy.method.aggregative.AggregativeProbabilisticQuantifier": [[11, 3, 1, "", "classify"]], "quapy.method.aggregative.AggregativeQuantifier": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 2, 1, "", "classifier"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.aggregative.CC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DistributionMatching": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DyS": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ELM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.EMQ": [[11, 3, 1, "", "EM"], [11, 4, 1, "", "EPSILON"], [11, 4, 1, "", "MAX_ITER"], [11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.OneVsAll": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "set_params"]], "quapy.method.aggregative.PACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.SMM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ThresholdOptimization": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.base": [[11, 1, 1, "", "BaseQuantifier"], [11, 1, 1, "", "BinaryQuantifier"], [11, 1, 1, "", "OneVsAllGeneric"]], "quapy.method.base.BaseQuantifier": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[11, 2, 1, "", "classes"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.meta": [[11, 5, 1, "", "EACC"], [11, 5, 1, "", "ECC"], [11, 5, 1, "", "EEMQ"], [11, 5, 1, "", "EHDy"], [11, 5, 1, "", "EPACC"], [11, 1, 1, "", "Ensemble"], [11, 5, 1, "", "ensembleFactory"], [11, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[11, 4, 1, "", "VALID_POLICIES"], [11, 2, 1, "", "aggregative"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 2, 1, "", "probabilistic"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.neural": [[11, 1, 1, "", "QuaNetModule"], [11, 1, 1, "", "QuaNetTrainer"], [11, 5, 1, "", "mae_loss"]], "quapy.method.neural.QuaNetModule": [[11, 2, 1, "", "device"], [11, 3, 1, "", "forward"], [11, 4, 1, "", "training"]], "quapy.method.neural.QuaNetTrainer": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "clean_checkpoint"], [11, 3, 1, "", "clean_checkpoint_dir"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[11, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.model_selection": [[8, 1, 1, "", "GridSearchQ"], [8, 5, 1, "", "cross_val_predict"]], "quapy.model_selection.GridSearchQ": [[8, 3, 1, "", "best_model"], [8, 3, 1, "", "fit"], [8, 3, 1, "", "get_params"], [8, 3, 1, "", "quantify"], [8, 3, 1, "", "set_params"]], "quapy.plot": [[8, 5, 1, "", "binary_bias_bins"], [8, 5, 1, "", "binary_bias_global"], [8, 5, 1, "", "binary_diagonal"], [8, 5, 1, "", "brokenbar_supremacy_by_drift"], [8, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[8, 1, 1, "", "APP"], [8, 1, 1, "", "AbstractProtocol"], [8, 1, 1, "", "AbstractStochasticSeededProtocol"], [8, 1, 1, "", "DomainMixer"], [8, 1, 1, "", "NPP"], [8, 1, 1, "", "OnLabelledCollectionProtocol"], [8, 1, 1, "", "USimplexPP"]], "quapy.protocol.APP": [[8, 3, 1, "", "prevalence_grid"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[8, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[8, 3, 1, "", "collator"], [8, 2, 1, "", "random_state"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.NPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[8, 4, 1, "", "RETURN_TYPES"], [8, 3, 1, "", "get_collator"], [8, 3, 1, "", "get_labelled_collection"], [8, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.USimplexPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.util": [[8, 1, 1, "", "EarlyStop"], [8, 5, 1, "", "create_if_not_exist"], [8, 5, 1, "", "create_parent_dir"], [8, 5, 1, "", "download_file"], [8, 5, 1, "", "download_file_if_not_exists"], [8, 5, 1, "", "get_quapy_home"], [8, 5, 1, "", "map_parallel"], [8, 5, 1, "", "parallel"], [8, 5, 1, "", "pickled_resource"], [8, 5, 1, "", "save_text_file"], [8, 5, 1, "", "temp_seed"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"dataset": [0, 10], "review": 0, "twitter": 0, "sentiment": 0, "uci": 0, "machin": 0, "learn": 0, "issu": 0, "ad": 0, "custom": 0, "data": [0, 10], "process": 0, "evalu": [1, 8], "error": [1, 5, 8], "measur": 1, "protocol": [1, 8], "instal": 2, "requir": 2, "svm": 2, "perf": 2, "quantif": [2, 3, 4, 5], "orient": [2, 4], "loss": [2, 3, 4], "method": [3, 9, 11], "aggreg": [3, 11], "The": 3, "classifi": 3, "count": 3, "variant": 3, "expect": 3, "maxim": 3, "emq": 3, "helling": 3, "distanc": 3, "y": 3, "hdy": 3, "explicit": 3, "minim": 3, "meta": [3, 11], "model": [3, 4], "ensembl": 3, "quanet": 3, "neural": [3, 9, 11], "network": 3, "select": 4, "target": 4, "classif": [4, 9], "plot": [5, 8], "diagon": 5, "bia": 5, "drift": 5, "welcom": 6, "quapi": [6, 7, 8, 9, 10, 11], "": 6, "document": 6, "introduct": 6, "A": 6, "quick": 6, "exampl": 6, "featur": 6, "content": [6, 8, 9, 10, 11], "indic": 6, "tabl": 6, "packag": [8, 9, 10, 11], "subpackag": 8, "submodul": [8, 9, 10, 11], "function": 8, "model_select": 8, "util": 8, "modul": [8, 9, 10, 11], "calibr": 9, "svmperf": 9, "base": [10, 11], "preprocess": 10, "reader": 10, "non_aggreg": 11}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Datasets": [[0, "datasets"]], "Reviews Datasets": [[0, "reviews-datasets"]], "Twitter Sentiment Datasets": [[0, "twitter-sentiment-datasets"]], "UCI Machine Learning": [[0, "uci-machine-learning"]], "Issues:": [[0, "issues"]], "Adding Custom Datasets": [[0, "adding-custom-datasets"]], "Data Processing": [[0, "data-processing"]], "Evaluation": [[1, "evaluation"]], "Error Measures": [[1, "error-measures"]], "Evaluation Protocols": [[1, "evaluation-protocols"]], "Installation": [[2, "installation"]], "Requirements": [[2, "requirements"]], "SVM-perf with quantification-oriented losses": [[2, "svm-perf-with-quantification-oriented-losses"]], "Quantification Methods": [[3, "quantification-methods"]], "Aggregative Methods": [[3, "aggregative-methods"]], "The Classify & Count variants": [[3, "the-classify-count-variants"]], "Expectation Maximization (EMQ)": [[3, "expectation-maximization-emq"]], "Hellinger Distance y (HDy)": [[3, "hellinger-distance-y-hdy"]], "Explicit Loss Minimization": [[3, "explicit-loss-minimization"]], "Meta Models": [[3, "meta-models"]], "Ensembles": [[3, "ensembles"]], "The QuaNet neural network": [[3, "the-quanet-neural-network"]], "Model Selection": [[4, "model-selection"]], "Targeting a Quantification-oriented loss": [[4, "targeting-a-quantification-oriented-loss"]], "Targeting a Classification-oriented loss": [[4, "targeting-a-classification-oriented-loss"]], "Plotting": [[5, "plotting"]], "Diagonal Plot": [[5, "diagonal-plot"]], "Quantification bias": [[5, "quantification-bias"]], "Error by Drift": [[5, "error-by-drift"]], "Welcome to QuaPy\u2019s documentation!": [[6, "welcome-to-quapy-s-documentation"]], "Introduction": [[6, "introduction"]], "A quick example:": [[6, "a-quick-example"]], "Features": [[6, "features"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "quapy": [[7, "quapy"]], "Submodules": [[9, "submodules"], [8, "submodules"], [10, "submodules"], [11, "submodules"]], "Module contents": [[9, "module-quapy.classification"], [8, "module-quapy"], [10, "module-quapy.data"], [11, "module-quapy.method"]], "quapy.classification package": [[9, "quapy-classification-package"]], "quapy.classification.calibration": [[9, "quapy-classification-calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "quapy package": [[8, "quapy-package"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.protocol": [[8, "quapy-protocol"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.util": [[8, "module-quapy.util"]], "Subpackages": [[8, "subpackages"]], "quapy.data package": [[10, "quapy-data-package"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "quapy.method package": [[11, "quapy-method-package"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[8, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol"]], "domainmixer (class in quapy.protocol)": [[8, "quapy.protocol.DomainMixer"]], "earlystop (class in quapy.util)": [[8, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[8, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[8, "quapy.functional.HellingerDistance"]], "npp (class in quapy.protocol)": [[8, "quapy.protocol.NPP"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[8, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "topsoedistance() (in module quapy.functional)": [[8, "quapy.functional.TopsoeDistance"]], "usimplexpp (class in quapy.protocol)": [[8, "quapy.protocol.USimplexPP"]], "absolute_error() (in module quapy.error)": [[8, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[8, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[8, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[8, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[8, "quapy.error.ae"]], "best_model() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[8, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[8, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[8, "quapy.functional.check_prevalence_vector"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "create_if_not_exist() (in module quapy.util)": [[8, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[8, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[8, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[8, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[8, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[8, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluate"]], "evaluation_report() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluation_report"]], "f1_error() (in module quapy.error)": [[8, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[8, "quapy.error.f1e"]], "fit() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[8, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[8, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[8, "quapy.util.get_quapy_home"]], "kld() (in module quapy.error)": [[8, "quapy.error.kld"]], "mae() (in module quapy.error)": [[8, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[8, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[8, "quapy.error.mkld"]], "mnkld() (in module quapy.error)": [[8, "quapy.error.mnkld"]], "module": [[8, "module-quapy"], [8, "module-quapy.error"], [8, "module-quapy.evaluation"], [8, "module-quapy.functional"], [8, "module-quapy.model_selection"], [8, "module-quapy.plot"], [8, "module-quapy.protocol"], [8, "module-quapy.util"], [10, "module-quapy.data"], [10, "module-quapy.data.base"], [10, "module-quapy.data.datasets"], [10, "module-quapy.data.preprocessing"], [10, "module-quapy.data.reader"], [11, "module-quapy.method"], [11, "module-quapy.method.aggregative"], [11, "module-quapy.method.base"], [11, "module-quapy.method.meta"], [11, "module-quapy.method.neural"], [11, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[8, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[8, "quapy.error.mse"]], "nkld() (in module quapy.error)": [[8, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[8, "quapy.functional.normalize_prevalence"]], "num_prevalence_combinations() (in module quapy.functional)": [[8, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "parallel() (in module quapy.util)": [[8, "quapy.util.parallel"]], "pickled_resource() (in module quapy.util)": [[8, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[8, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[8, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[8, "module-quapy"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.protocol": [[8, "module-quapy.protocol"]], "quapy.util": [[8, "module-quapy.util"]], "rae() (in module quapy.error)": [[8, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[8, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[8, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[8, "quapy.error.smooth"]], "strprev() (in module quapy.functional)": [[8, "quapy.functional.strprev"]], "temp_seed() (in module quapy.util)": [[8, "quapy.util.temp_seed"]], "total() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[8, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_simplex_sampling"]], "dataset (class in quapy.data.base)": [[10, "quapy.data.base.Dataset"]], "indextransformer (class in quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.IndexTransformer"]], "labelledcollection (class in quapy.data.base)": [[10, "quapy.data.base.LabelledCollection"]], "splitstratified() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.SplitStratified"]], "x (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.X"]], "xp (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xp"]], "xy (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xy"]], "add_word() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.add_word"]], "binarize() (in module quapy.data.reader)": [[10, "quapy.data.reader.binarize"]], "binary (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.binary"]], "binary (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.binary"]], "classes_ (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.classes_"]], "counts() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.counts"]], "fetch_ucidataset() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCIDataset"]], "fetch_ucilabelledcollection() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCILabelledCollection"]], "fetch_lequa2022() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_lequa2022"]], "fetch_reviews() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_reviews"]], "fetch_twitter() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_twitter"]], "fit() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit"]], "fit_transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit_transform"]], "from_csv() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_csv"]], "from_sparse() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_sparse"]], "from_text() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_text"]], "index() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.index"]], "kfcv() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.kFCV"]], "kfcv() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.kFCV"]], "load() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.load"]], "load() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.load"]], "n_classes (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.n_classes"]], "n_classes (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.n_classes"]], "p (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.p"]], "prevalence() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.prevalence"]], "quapy.data": [[10, "module-quapy.data"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "reduce_columns() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.reduce_columns"]], "reindex_labels() (in module quapy.data.reader)": [[10, "quapy.data.reader.reindex_labels"]], "sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling"]], "sampling_from_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_from_index"]], "sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_index"]], "split_random() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_random"]], "split_stratified() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_stratified"]], "standardize() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.standardize"]], "stats() (quapy.data.base.dataset method)": [[10, "quapy.data.base.Dataset.stats"]], "stats() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.stats"]], "text2tfidf() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.text2tfidf"]], "train_test (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.train_test"]], "transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.transform"]], "uniform_sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling"]], "uniform_sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling_index"]], "vocabulary_size (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.vocabulary_size"]], "vocabulary_size() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.vocabulary_size"]], "warn() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.warn"]], "y (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.y"]], "acc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativeprobabilisticquantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier"]], "aggregativequantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeQuantifier"]], "basequantifier (class in quapy.method.base)": [[11, "quapy.method.base.BaseQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[11, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.CC"]], "classifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ClassifyAndCount"]], "distributionmatching (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DistributionMatching"]], "dys (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[11, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[11, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[11, "quapy.method.meta.EHDy"]], "elm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ELM"]], "em() (quapy.method.aggregative.emq class method)": [[11, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.EMQ"]], "epacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[11, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "explicitlossminimisation (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExplicitLossMinimisation"]], "hdy (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.HellingerDistanceY"]], "max (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.MAX_ITER"]], "ms (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS"]], "ms2 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "mediansweep (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep"]], "mediansweep2 (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep2"]], "onevsall (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.OneVsAll"]], "onevsallgeneric (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetModule"]], "quanettrainer (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetTrainer"]], "sld (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SMM"]], "svmae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMAE"]], "svmkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMKLD"]], "svmnkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMNKLD"]], "svmq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMQ"]], "svmrae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMRAE"]], "t50 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.T50"]], "thresholdoptimization (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[11, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.X"]], "aggregate() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.aggregate"]], "aggregate() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.aggregate"]], "aggregative (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.aggregative"]], "classes (quapy.method.base.onevsallgeneric property)": [[11, "quapy.method.base.OneVsAllGeneric.classes"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.aggregative.onevsall property)": [[11, "quapy.method.aggregative.OneVsAll.classes_"]], "classes_ (quapy.method.neural.quanettrainer property)": [[11, "quapy.method.neural.QuaNetTrainer.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classify() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.classify"]], "classify() (quapy.method.aggregative.aggregativeprobabilisticquantifier method)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier.classify"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.classify"]], "classify() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.classify"]], "classify() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.classify"]], "clean_checkpoint() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint_dir"]], "cross_generate_predictions() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions"]], "cross_generate_predictions_depr() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions_depr"]], "device (quapy.method.neural.quanetmodule property)": [[11, "quapy.method.neural.QuaNetModule.device"]], "ensemblefactory() (in module quapy.method.meta)": [[11, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.fit"]], "fit() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.fit"]], "fit() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.fit"]], "fit() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.fit"]], "fit() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.fit"]], "fit() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.fit"]], "fit() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.fit"]], "fit() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.fit"]], "fit() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.fit"]], "fit() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.fit"]], "fit() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.fit"]], "fit() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "forward() (quapy.method.neural.quanetmodule method)": [[11, "quapy.method.neural.QuaNetModule.forward"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[11, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_params() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.get_params"]], "get_params() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.get_params"]], "get_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.get_params"]], "get_probability_distribution() (in module quapy.method.meta)": [[11, "quapy.method.meta.get_probability_distribution"]], "mae_loss() (in module quapy.method.neural)": [[11, "quapy.method.neural.mae_loss"]], "predict_proba() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quapy.method": [[11, "module-quapy.method"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]], "set_params() (quapy.method.aggregative.onevsall method)": [[11, "quapy.method.aggregative.OneVsAll.set_params"]], "set_params() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.set_params"]], "set_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.set_params"]], "solve_adjustment() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.solve_adjustment"]], "training (quapy.method.neural.quanetmodule attribute)": [[11, "quapy.method.neural.QuaNetModule.training"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["Datasets", "Evaluation", "Installation", "Methods", "Model-Selection", "Plotting", "index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["Datasets.md", "Evaluation.md", "Installation.rst", "Methods.md", "Model-Selection.md", "Plotting.md", "index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Datasets", "Evaluation", "Installation", "Quantification Methods", "Model Selection", "Plotting", "Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"quapi": [0, 1, 2, 3, 4, 5], "make": [0, 1, 3, 8, 11], "avail": [0, 1, 2, 3, 5, 6, 9, 11], "sever": [0, 10], "have": [0, 1, 2, 3, 4, 5, 8, 10, 11], "been": [0, 3, 4, 5, 8, 9, 10, 11], "us": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "quantif": [0, 1, 6, 8, 9, 10, 11], "literatur": [0, 1, 4, 6], "well": [0, 3, 4, 5, 11], "an": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "interfac": [0, 1, 11], "allow": [0, 1, 2, 3, 5, 8, 9, 10, 11], "anyon": 0, "import": [0, 1, 3, 4, 5, 6, 10, 11], "A": [0, 3, 8, 9, 10, 11], "object": [0, 8, 9, 10, 11], "i": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "roughli": 0, "pair": [0, 8], "labelledcollect": [0, 3, 4, 8, 10, 11], "one": [0, 1, 3, 4, 5, 8, 10, 11], "plai": 0, "role": 0, "train": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "set": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "anoth": [0, 1, 3, 5, 10], "test": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "class": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "consist": [0, 4, 5, 8, 9, 10, 11], "iter": [0, 8, 11], "instanc": [0, 3, 4, 5, 6, 8, 9, 10, 11], "label": [0, 3, 4, 5, 6, 8, 9, 10, 11], "thi": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "handl": 0, "most": [0, 3, 5, 6, 8, 10, 11], "sampl": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "function": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11], "take": [0, 3, 5, 8, 10, 11], "look": [0, 1, 3, 5, 11], "follow": [0, 1, 3, 4, 5, 6, 8, 11], "code": [0, 3, 4, 5, 9], "qp": [0, 1, 3, 4, 5, 6, 8, 10, 11], "f": [0, 1, 3, 4, 5, 6, 10], "1st": 0, "posit": [0, 3, 5, 8, 10, 11], "document": [0, 1, 3, 5, 9, 10, 11], "2nd": 0, "onli": [0, 3, 5, 8, 9, 10, 11], "neg": [0, 5, 8, 11], "neutral": 0, "3rd": 0, "2": [0, 1, 3, 5, 8, 10, 11], "0": [0, 1, 3, 4, 5, 8, 9, 10, 11], "1": [0, 1, 3, 4, 5, 8, 9, 10, 11], "print": [0, 1, 3, 4, 6, 9, 10], "strprev": [0, 1, 8], "preval": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "prec": [0, 8], "output": [0, 1, 3, 4, 9, 10, 11], "show": [0, 1, 3, 4, 5, 8, 9, 10, 11], "digit": 0, "precis": [0, 1, 8], "17": 0, "50": [0, 5, 8, 11], "33": [0, 5, 8], "One": [0, 1, 3, 11], "can": [0, 1, 2, 3, 4, 5, 8, 10, 11], "easili": [0, 2, 5, 9], "produc": [0, 1, 5, 8], "new": [0, 3, 8, 9, 10], "desir": [0, 1, 10], "sample_s": [0, 1, 3, 4, 5, 8, 11], "10": [0, 1, 4, 5, 8, 9, 11], "prev": [0, 1, 8, 10], "4": [0, 1, 3, 4, 5, 10, 11], "5": [0, 1, 3, 4, 5, 8, 9, 10, 11], "which": [0, 1, 3, 4, 5, 8, 9, 10, 11], "40": [0, 3, 4, 11], "made": [0, 2, 8, 10, 11], "across": [0, 1, 4, 5, 6, 8, 11], "differ": [0, 1, 3, 4, 5, 6, 8, 10, 11], "run": [0, 1, 2, 3, 4, 5, 8, 10, 11], "e": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "g": [0, 1, 3, 4, 6, 8, 10, 11], "method": [0, 1, 4, 5, 6, 8], "same": [0, 3, 5, 8, 10, 11], "exact": [0, 10], "retain": [0, 3, 9, 11], "index": [0, 3, 6, 8, 9, 10, 11], "gener": [0, 1, 3, 4, 5, 8, 9, 10, 11], "sampling_index": [0, 10], "sampling_from_index": [0, 10], "also": [0, 1, 2, 3, 5, 6, 8, 9], "implement": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "artifici": [0, 1, 3, 4, 5, 6, 8], "protocol": [0, 3, 4, 5, 6, 7, 10, 11], "via": [0, 2, 3, 8, 9, 11], "python": [0, 6], "": [0, 1, 3, 4, 5, 8, 9, 10, 11], "seri": [0, 10], "equidist": [0, 8], "rang": [0, 5, 8, 11], "entir": [0, 3, 4, 5, 8], "spectrum": [0, 1, 4, 5, 8], "simplex": [0, 8], "space": [0, 4, 8, 9], "artificial_sampling_gener": 0, "100": [0, 1, 3, 4, 5, 8, 9, 10, 11], "n_preval": [0, 8], "each": [0, 1, 3, 4, 5, 8, 9, 10, 11], "valid": [0, 1, 3, 4, 5, 8, 9, 10, 11], "combin": [0, 1, 4, 8, 11], "origin": [0, 3, 8, 10], "from": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "split": [0, 3, 4, 5, 8, 9, 10, 11], "point": [0, 1, 3, 8, 10], "25": [0, 5, 8, 9, 11], "75": [0, 5, 8], "00": [0, 1, 4], "see": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "evalu": [0, 3, 4, 5, 6, 7, 9, 10, 11], "wiki": [0, 3], "further": [0, 1, 3, 9, 10, 11], "detail": [0, 1, 3, 6, 9, 10, 11], "how": [0, 1, 3, 4, 5, 8, 10, 11], "properli": 0, "three": [0, 5], "about": [0, 5, 8, 10], "kindl": [0, 1, 3, 5, 10, 11], "devic": [0, 3, 5, 9, 11], "harri": 0, "potter": 0, "known": [0, 3, 4, 8, 11], "imdb": [0, 5, 10], "movi": 0, "fetch": [0, 6], "unifi": [0, 11], "For": [0, 1, 5, 6, 8, 10], "exampl": [0, 1, 3, 4, 5, 8, 9, 10, 11], "fetch_review": [0, 1, 3, 4, 5, 10, 11], "These": [0, 9], "esuli": [0, 2, 3, 9, 10, 11], "moreo": [0, 3, 4, 10, 11], "sebastiani": [0, 3, 4, 10, 11], "2018": [0, 3, 10], "octob": [0, 3], "recurr": [0, 3, 10], "neural": [0, 8, 10], "network": [0, 8, 9, 10, 11], "In": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "proceed": [0, 3, 10], "27th": [0, 3, 10], "acm": [0, 3, 10, 11], "intern": [0, 3, 9, 10], "confer": [0, 3, 9, 10], "inform": [0, 1, 3, 4, 8, 9, 10, 11], "knowledg": [0, 3, 10], "manag": [0, 3, 10], "pp": [0, 3, 9], "1775": [0, 3], "1778": [0, 3], "The": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11], "list": [0, 5, 8, 9, 10, 11], "id": [0, 3, 10], "reviews_sentiment_dataset": [0, 10], "some": [0, 1, 3, 5, 8, 10, 11], "statist": [0, 1, 8, 11], "fhe": 0, "ar": [0, 1, 3, 4, 5, 8, 9, 10, 11], "summar": 0, "below": [0, 2, 3, 5, 8, 10], "size": [0, 1, 3, 8, 9, 10, 11], "type": [0, 3, 8, 10, 11], "hp": [0, 3, 4, 10], "9533": 0, "18399": 0, "018": 0, "982": 0, "065": 0, "935": 0, "text": [0, 3, 8, 9, 10, 11], "3821": [0, 10], "21591": [0, 10], "081": [0, 10], "919": [0, 10], "063": [0, 10], "937": [0, 10], "25000": 0, "500": [0, 1, 4, 5, 11], "11": [0, 1, 6, 8], "analysi": [0, 3, 6, 10], "access": [0, 3, 10, 11], "were": 0, "tf": [0, 10], "idf": 0, "format": [0, 5, 10, 11], "present": [0, 3, 10], "two": [0, 1, 3, 4, 5, 8, 10, 11], "val": [0, 9, 10], "model": [0, 1, 5, 6, 8, 9, 11], "select": [0, 3, 6, 8, 10, 11], "purpos": [0, 11], "exemplifi": 0, "load": [0, 3, 8, 10, 11], "fetch_twitt": [0, 3, 6, 10], "gasp": [0, 10], "for_model_select": [0, 10], "true": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "gao": [0, 3, 10, 11], "w": [0, 3, 10], "2015": [0, 2, 3, 9, 11], "august": 0, "tweet": [0, 3, 10], "classif": [0, 1, 3, 6, 8, 10, 11], "ieee": 0, "advanc": [0, 6], "social": [0, 3, 10], "mine": [0, 3], "asonam": 0, "97": 0, "104": 0, "semeval13": [0, 10], "semeval14": [0, 10], "semeval15": [0, 10], "share": [0, 10], "semev": 0, "mean": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "would": [0, 1, 3, 5, 6, 10, 11], "get": [0, 1, 5, 8, 9, 10, 11], "when": [0, 1, 3, 4, 5, 8, 9, 10], "request": [0, 8, 10, 11], "ani": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "them": [0, 3, 10, 11], "consult": [0, 1], "twitter_sentiment_datasets_test": [0, 10], "9": [0, 1, 3, 5, 8], "replac": [0, 3, 10], "twitter_sentiment_datasets_train": [0, 10], "found": [0, 3, 4, 8, 9, 10], "featur": [0, 10], "3": [0, 1, 3, 5, 6, 8, 9, 10, 11], "8788": 0, "3765": 0, "694582": 0, "421": 0, "496": 0, "082": [0, 1], "407": 0, "507": 0, "086": 0, "spars": [0, 10], "hcr": [0, 3, 10], "1594": 0, "798": 0, "222046": 0, "546": 0, "211": 0, "243": 0, "640": 0, "167": 0, "193": 0, "omd": [0, 10], "1839": 0, "787": 0, "199151": 0, "463": 0, "271": 0, "266": 0, "437": 0, "283": [0, 1], "280": 0, "sander": [0, 10], "2155": 0, "923": 0, "229399": 0, "161": 0, "691": 0, "148": 0, "164": [0, 3], "688": 0, "11338": 0, "3813": 0, "1215742": 0, "159": 0, "470": 0, "372": 0, "158": 0, "430": 0, "412": 0, "1853": 0, "109": 0, "361": 0, "530": 0, "2390": 0, "153": 0, "413": 0, "434": 0, "semeval16": [0, 6, 10], "8000": 0, "2000": 0, "889504": 0, "157": 0, "351": 0, "492": 0, "163": [0, 1], "341": 0, "497": 0, "sst": [0, 10], "2971": 0, "1271": 0, "376132": 0, "261": 0, "452": 0, "288": 0, "207": 0, "481": 0, "312": 0, "wa": [0, 3, 5, 8, 10, 11], "2184": 0, "936": 0, "248563": 0, "305": 0, "414": 0, "281": 0, "282": 0, "446": 0, "272": 0, "wb": [0, 10], "4259": 0, "1823": 0, "404333": 0, "270": 0, "392": 0, "337": 0, "274": 0, "335": 0, "32": [0, 6], "repositori": [0, 10], "p\u00e9rez": [0, 3, 10, 11], "g\u00e1llego": [0, 3, 10, 11], "p": [0, 3, 8, 9, 10, 11], "quevedo": [0, 3, 10], "j": [0, 3, 10, 11], "r": [0, 3, 8, 10], "del": [0, 3, 10], "coz": [0, 3, 10], "2017": [0, 3, 10, 11], "ensembl": [0, 6, 10, 11], "problem": [0, 3, 5, 8, 10, 11], "characteriz": [0, 3, 10], "chang": [0, 1, 3, 10], "distribut": [0, 3, 5, 8, 10, 11], "case": [0, 1, 3, 4, 5, 8, 9, 10, 11], "studi": [0, 3, 10], "fusion": [0, 3, 10], "34": [0, 3, 10, 11], "87": [0, 3, 10], "doe": [0, 2, 3, 8, 11], "exactli": 0, "coincid": [0, 6], "et": [0, 2, 9, 10, 11], "al": [0, 2, 9, 10, 11], "sinc": [0, 1, 3, 5, 10, 11], "we": [0, 1, 3, 4, 5, 6, 10], "unabl": 0, "find": [0, 4, 11], "diabet": 0, "phonem": 0, "call": [0, 1, 5, 8, 10, 11], "fetch_ucidataset": [0, 3, 10], "yeast": [0, 10], "verbos": [0, 1, 4, 8, 9, 10, 11], "return": [0, 1, 3, 4, 5, 8, 9, 10, 11], "randomli": [0, 10], "drawn": [0, 1, 4, 8, 10], "stratifi": [0, 3, 9, 10, 11], "manner": [0, 9, 11], "whole": [0, 1, 3, 4, 8, 9], "collect": [0, 8, 9, 10], "70": 0, "30": [0, 1, 3, 11], "respect": [0, 1, 5, 8, 11], "option": [0, 1, 3, 5, 10, 11], "indic": [0, 1, 3, 4, 5, 8, 9, 10, 11], "descript": [0, 10], "should": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "standard": [0, 1, 5, 8, 9, 10, 11], "paper": [0, 3, 9, 11], "submit": 0, "kfcv": [0, 9, 10, 11], "order": [0, 2, 3, 5, 8, 10, 11], "accommod": 0, "practic": [0, 4], "could": [0, 1, 3, 4, 5, 6], "first": [0, 1, 2, 3, 5, 8, 10, 11], "instanti": [0, 1, 3, 4, 8, 9, 11], "creat": [0, 6, 8, 11], "time": [0, 1, 3, 8, 10, 11], "fetch_ucilabelledcollect": [0, 10], "nfold": [0, 8, 10], "nrepeat": [0, 10], "abov": [0, 3, 5, 8], "conduct": [0, 8], "2x5fcv": 0, "all": [0, 1, 2, 3, 5, 8, 9, 11], "come": [0, 8, 10, 11], "numer": [0, 1, 3, 6, 10, 11], "form": [0, 8, 10, 11], "dens": [0, 11], "matric": [0, 5, 10], "acut": 0, "120": 0, "6": [0, 1, 3, 5, 10], "508": 0, "b": [0, 8, 10, 11], "583": 0, "417": 0, "balanc": [0, 4, 11], "625": 0, "539": 0, "461": 0, "922": 0, "078": 0, "breast": 0, "cancer": 0, "683": 0, "350": 0, "650": 0, "cmc": 0, "1473": 0, "573": 0, "427": 0, "774": 0, "226": 0, "653": 0, "347": 0, "ctg": 0, "2126": 0, "22": [0, 3, 9, 10], "222": [0, 9], "778": 0, "861": 0, "139": 0, "917": 0, "083": 0, "german": 0, "1000": [0, 4, 11], "24": [0, 9], "300": [0, 1, 9], "700": 0, "haberman": [0, 3], "306": 0, "735": 0, "265": 0, "ionospher": 0, "641": 0, "359": 0, "iri": 0, "150": 0, "667": 0, "333": 0, "mammograph": 0, "830": 0, "514": 0, "486": 0, "pageblock": 0, "5473": 0, "979": 0, "021": 0, "semeion": 0, "1593": 0, "256": [0, 9], "901": 0, "099": 0, "sonar": 0, "208": 0, "60": 0, "534": 0, "466": 0, "spambas": 0, "4601": 0, "57": 0, "606": 0, "394": 0, "spectf": 0, "267": 0, "44": 0, "794": 0, "206": 0, "tictacto": 0, "958": 0, "transfus": 0, "748": 0, "762": 0, "238": 0, "wdbc": 0, "569": 0, "627": 0, "373": 0, "wine": 0, "178": 0, "13": [0, 9], "669": 0, "331": 0, "601": 0, "399": 0, "730": 0, "q": [0, 2, 3, 8, 9, 11], "red": 0, "1599": 0, "465": 0, "535": 0, "white": 0, "4898": 0, "665": 0, "1484": 0, "8": [0, 1, 5, 10, 11], "711": 0, "289": 0, "download": [0, 2, 3, 8, 10], "automat": [0, 1], "thei": [0, 3, 11], "store": [0, 9, 10, 11], "quapy_data": [0, 8], "folder": [0, 10, 11], "faster": [0, 10], "reus": [0, 3, 8, 10], "howev": [0, 4, 5], "requir": [0, 1, 3, 6, 9], "special": [0, 5, 10], "action": 0, "moment": [0, 3], "fulli": [0, 8], "autom": [0, 3, 6], "cardiotocographi": 0, "excel": 0, "file": [0, 5, 8, 9, 10, 11], "user": [0, 1, 5], "instal": [0, 3, 6, 9, 11], "xlrd": [0, 2], "modul": [0, 1, 3, 5, 6, 7], "open": [0, 6, 10], "page": [0, 2, 6], "block": [0, 8], "need": [0, 3, 8, 10, 11], "unix": 0, "compress": 0, "extens": [0, 2, 5], "z": [0, 10], "directli": [0, 1, 3], "doabl": 0, "packag": [0, 2, 3, 6, 7], "like": [0, 1, 3, 5, 8, 9, 10, 11], "gzip": 0, "zip": [0, 5], "uncompress": 0, "o": [0, 8], "depend": [0, 1, 4, 5, 8, 11], "softwar": 0, "manual": 0, "do": [0, 1, 3, 4, 8, 9, 10, 11], "invok": [0, 1, 3, 8, 10], "provid": [0, 3, 5, 6, 10, 11], "loader": [0, 10], "simpl": [0, 3, 5, 11], "deal": 0, "t": [0, 1, 3, 8, 9, 11], "pre": [0, 3], "n": [0, 1, 8, 9, 11], "second": [0, 1, 3, 5, 8, 10], "represent": [0, 3, 8, 9, 11], "col": [0, 10], "int": [0, 5, 8, 10, 11], "float": [0, 3, 8, 9, 10, 11], "charg": [0, 10], "classmethod": [0, 8, 10, 11], "def": [0, 1, 3, 5, 8], "cl": 0, "path": [0, 3, 5, 8, 9, 10, 11], "str": [0, 8, 10, 11], "loader_func": [0, 10], "callabl": [0, 8, 10, 11], "defin": [0, 3, 8, 9, 10, 11], "argument": [0, 1, 3, 5, 8, 10, 11], "initi": [0, 9, 11], "particular": [0, 1, 3, 11], "receiv": [0, 3, 5], "addition": 0, "number": [0, 1, 3, 5, 8, 9, 10, 11], "specifi": [0, 1, 3, 5, 8, 9, 10], "otherwis": [0, 3, 8, 10], "infer": [0, 10], "least": [0, 10], "pass": [0, 1, 5, 8, 9, 11], "along": [0, 3, 8, 11], "train_path": [0, 10], "my_data": 0, "dat": [0, 9], "test_path": [0, 10], "my_custom_load": 0, "rb": 0, "fin": 0, "preprocess": [0, 1, 3, 8, 11], "includ": [0, 1, 3, 5, 6, 10, 11], "text2tfidf": [0, 1, 3, 10], "tfidf": [0, 4, 5, 10], "vector": [0, 8, 9, 10, 11], "reduce_column": [0, 10], "reduc": [0, 10], "column": [0, 10], "base": [0, 3, 6, 8, 9], "term": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "frequenc": [0, 10, 11], "transform": [0, 9, 10, 11], "valu": [0, 1, 3, 8, 9, 10, 11], "score": [0, 1, 4, 8, 9, 10], "subtract": [0, 8, 10], "normal": [0, 1, 3, 8, 10, 11], "deviat": [0, 1, 5, 8, 10], "so": [0, 1, 3, 5, 8, 9, 10, 11], "zero": [0, 8], "unit": [0, 8], "varianc": [0, 5], "textual": [0, 6, 10], "token": [0, 9, 10], "appeal": 1, "tool": [1, 6], "scenario": [1, 3, 4, 5, 6], "dataset": [1, 3, 4, 5, 6, 8, 9, 11], "shift": [1, 4, 6, 8, 9, 11], "particularli": 1, "prior": [1, 3, 4, 5, 6, 8, 11], "probabl": [1, 3, 4, 5, 6, 8, 9, 11], "That": [1, 4], "interest": [1, 5, 6, 8], "estim": [1, 3, 5, 6, 8, 9, 10, 11], "aris": 1, "under": 1, "belief": 1, "those": [1, 3, 4, 5, 8, 9, 11], "might": [1, 8, 10], "ones": [1, 3, 5, 8, 10, 11], "observ": [1, 11], "dure": [1, 5, 11], "other": [1, 3, 5, 6, 8, 10, 11], "word": [1, 3, 6, 9, 10, 11], "simpli": [1, 2, 3, 4, 5, 6, 8, 11], "predictor": 1, "assum": [1, 6, 11], "unlik": [1, 4, 8], "machin": [1, 4, 6, 9], "learn": [1, 2, 3, 4, 6, 8, 9, 10, 11], "govern": 1, "iid": [1, 5, 6], "assumpt": [1, 5, 6], "brief": [1, 10], "dedic": [1, 10], "explain": [1, 5], "here": [1, 11], "mae": [1, 4, 6, 8, 9, 11], "absolut": [1, 3, 5, 6, 8, 11], "mrae": [1, 6, 8, 9, 11], "rel": [1, 3, 8, 10, 11], "mse": [1, 3, 6, 8, 11], "squar": [1, 3, 8], "mkld": [1, 8, 11], "kullback": [1, 3, 8, 11], "leibler": [1, 3, 8, 11], "diverg": [1, 3, 8, 11], "mnkld": [1, 8, 11], "ae": [1, 2, 5, 8, 11], "rae": [1, 2, 8, 11], "se": [1, 8], "kld": [1, 2, 8, 9, 11], "nkld": [1, 2, 6, 8, 9, 11], "individu": [1, 3], "without": [1, 3, 8, 10], "averag": [1, 3, 8, 10, 11], "acc": [1, 3, 5, 6, 8, 11], "accuraci": [1, 5, 8, 11], "f1e": [1, 8], "f1": [1, 8, 9], "true_prev": [1, 5, 8], "prevs_hat": [1, 8], "ndarrai": [1, 3, 8, 10, 11], "contain": [1, 2, 3, 5, 8, 9, 10, 11], "smooth": [1, 8], "stabil": [1, 11], "third": [1, 5], "ep": [1, 8], "none": [1, 4, 8, 9, 10, 11], "paramet": [1, 3, 4, 8, 9, 10, 11], "epsilon": [1, 8, 11], "tradition": 1, "2t": [1, 8], "past": 1, "either": [1, 3, 8, 11], "environ": [1, 3, 4, 5, 8, 11], "variabl": [1, 3, 5, 8, 10], "onc": [1, 3, 5, 8, 10], "ommit": 1, "thereaft": 1, "recommend": [1, 5, 11], "np": [1, 3, 4, 5, 8, 10, 11], "asarrai": 1, "let": [1, 3, 11], "estim_prev": [1, 5, 8], "ae_": 1, "3f": [1, 6], "200": [1, 9], "600": 1, "914": 1, "final": [1, 3, 5, 11], "possibl": [1, 3, 8, 11], "string": [1, 8, 10, 11], "error_funct": 1, "from_nam": [1, 8], "accord": [1, 3, 4, 8, 9, 10, 11], "fix": [1, 4], "cover": [1, 4, 8, 9], "full": [1, 8], "contrast": 1, "natur": [1, 8], "despit": 1, "introduc": 1, "approxim": [1, 5, 8, 9], "preserv": [1, 5, 8], "procol": 1, "equal": [1, 8, 11], "distant": [1, 8], "interv": [1, 5, 8], "n_prevpoint": [1, 4, 5, 8], "determin": [1, 4, 5, 8], "constrain": [1, 5, 8, 10], "obtain": [1, 4, 8, 9, 11], "66": [1, 11], "given": [1, 3, 4, 8, 9, 10, 11], "num_prevalence_combin": [1, 8], "21": [1, 3, 5, 8], "n_class": [1, 3, 8, 9, 10, 11], "n_repeat": [1, 8], "1771": 1, "note": [1, 3, 4, 5, 8, 10], "last": [1, 3, 5, 8, 9, 10], "typic": [1, 4, 5, 8, 9, 10, 11], "singl": [1, 3, 6, 11], "higher": [1, 5], "comput": [1, 3, 5, 8, 11], "perform": [1, 3, 4, 5, 6, 8, 9, 11], "signific": 1, "instead": [1, 3, 4, 8, 10, 11], "work": [1, 3, 4, 5, 8, 10, 11], "wai": [1, 11], "around": [1, 10], "maximum": [1, 8, 9, 11], "budg": 1, "close": [1, 10], "than": [1, 4, 5, 8, 9, 10], "budget": [1, 4], "achiev": [1, 3, 4, 5], "get_nprevpoints_approxim": [1, 8], "5000": [1, 5], "4960": 1, "cost": 1, "sometim": 1, "cumbersom": 1, "control": [1, 4, 8], "overal": 1, "experi": [1, 2, 3, 4, 5, 8], "rather": [1, 4], "By": [1, 3, 8], "avoid": [1, 8], "lead": [1, 10], "closer": 1, "surpass": 1, "script": [1, 2, 3, 6, 11], "pacc": [1, 3, 5, 8, 11], "reli": [1, 3, 8, 11], "logist": [1, 3, 9, 11], "regressor": [1, 3], "classifi": [1, 4, 5, 6, 8, 9, 11], "variou": [1, 5], "metric": [1, 3, 4, 6, 8, 11], "sklearn": [1, 3, 4, 5, 6, 9, 10, 11], "linear_model": [1, 3, 4, 6, 9], "logisticregress": [1, 3, 4, 6, 9, 11], "data": [1, 3, 4, 5, 6, 8, 9, 11], "min_df": [1, 3, 4, 5, 10, 11], "inplac": [1, 3, 10, 11], "lr": [1, 3, 9, 11], "aggreg": [1, 4, 5, 6, 8], "fit": [1, 3, 4, 5, 6, 8, 9, 10, 11], "df": 1, "artificial_sampling_report": 1, "mani": [1, 3, 4, 5, 6, 8, 11], "extract": [1, 8, 10], "categori": [1, 8], "n_repetit": [1, 4, 5], "n_job": [1, 3, 4, 8, 9, 10, 11], "parallel": [1, 3, 8, 9, 10, 11], "worker": [1, 8, 9, 10, 11], "cpu": [1, 9, 11], "random_se": [1, 8], "42": 1, "random": [1, 3, 4, 5, 8, 10], "seed": [1, 4, 8, 10], "replic": [1, 4, 8], "error_metr": [1, 4, 8], "line": [1, 3, 8], "result": [1, 2, 3, 4, 5, 6, 11], "report": 1, "panda": [1, 2], "datafram": 1, "displai": [1, 5, 8, 9], "just": [1, 3], "clearer": 1, "shown": [1, 5, 8], "convert": [1, 3, 8, 9, 10, 11], "repres": [1, 3, 5, 8, 10, 11], "decim": 1, "default": [1, 3, 8, 9, 10, 11], "pd": 1, "set_opt": 1, "expand_frame_repr": 1, "fals": [1, 3, 5, 8, 9, 10, 11], "map": [1, 9, 11], "000": 1, "000e": 1, "091": 1, "909": 1, "009": 1, "048": 1, "426e": 1, "04": 1, "837": 1, "037": 1, "114": 1, "633e": 1, "03": 1, "7": [1, 5, 8, 9, 11], "717": 1, "017": 1, "041": 1, "383e": 1, "366": 1, "634": 1, "034": 1, "070": 1, "412e": 1, "459": 1, "541": 1, "387e": 1, "565": 1, "435": 1, "035": 1, "073": 1, "535e": 1, "654": 1, "346": 1, "046": 1, "108": 1, "701e": 1, "725": 1, "275": 1, "075": 1, "235": 1, "515e": 1, "02": 1, "858": 1, "142": 1, "042": 1, "229": 1, "740e": 1, "945": 1, "055": 1, "27": [1, 3, 9], "357": 1, "219e": 1, "578": 1, "dtype": [1, 10], "float64": 1, "artificial_sampling_ev": [1, 4], "artificial_sampling_predict": [1, 5], "arrai": [1, 3, 5, 8, 9, 10, 11], "pip": 2, "older": 2, "version": [2, 8, 9, 11], "scikit": [2, 3, 4, 8, 9, 10, 11], "numpi": [2, 4, 8, 9], "scipi": [2, 10], "pytorch": [2, 11], "quanet": [2, 6, 9, 11], "svmperf": [2, 3, 8, 11], "patch": [2, 3, 9, 11], "joblib": [2, 11], "tqdm": 2, "matplotlib": [2, 8], "involv": [2, 5, 8], "you": [2, 3], "appli": [2, 3, 4, 5, 8, 9, 10, 11], "ext": 2, "compil": [2, 3], "sourc": [2, 3, 6, 9], "prepare_svmperf": [2, 3], "sh": [2, 3], "job": 2, "directori": [2, 8, 9, 10, 11], "svm_perf_quantif": [2, 3], "optim": [2, 3, 4, 8, 9, 11], "measur": [2, 3, 4, 5, 6, 8, 11], "propos": [2, 3, 11], "barranquero": [2, 3, 9, 11], "extend": [2, 3, 8, 11], "former": [2, 11], "categor": [3, 10], "belong": [3, 11], "non": [3, 11], "group": 3, "though": [3, 8], "plan": 3, "add": [3, 4, 8, 10], "more": [3, 5, 11], "futur": 3, "character": [3, 6], "fact": [3, 5], "product": [3, 10], "quantifi": [3, 4, 5, 6, 8, 10, 11], "shoud": 3, "basequantifi": [3, 8, 11], "abstract": [3, 8, 9, 10, 11], "abstractmethod": 3, "self": [3, 8, 9, 10, 11], "set_param": [3, 8, 9, 11], "get_param": [3, 8, 9, 11], "deep": [3, 8, 11], "familiar": 3, "structur": [3, 11], "inspir": 3, "reason": [3, 5, 6], "why": 3, "ha": [3, 4, 5, 8, 9, 10, 11], "adopt": [3, 4, 10], "respond": 3, "predict": [3, 4, 5, 8, 9, 11], "input": [3, 5, 8, 9, 11], "element": [3, 10, 11], "while": [3, 5, 9, 10, 11], "selector": 3, "process": [3, 4, 8], "hyperparamet": [3, 8, 11], "search": [3, 4, 6, 8, 11], "part": [3, 10], "aggregativequantifi": [3, 11], "must": [3, 10, 11], "fit_learn": 3, "classif_predict": [3, 11], "mention": 3, "befor": [3, 8, 9, 10, 11], "inde": [3, 4], "alreadi": [3, 8, 11], "preclassifi": 3, "maintain": [3, 11], "through": [3, 8], "properti": [3, 8, 9, 10, 11], "learner": [3, 4, 9, 11], "extern": 3, "probabilist": [3, 9, 11], "inherit": 3, "aggregativeprobabilisticquantifi": [3, 11], "posterior": [3, 8, 9, 11], "crisp": [3, 8, 11], "decis": [3, 8, 9, 11], "hard": [3, 9], "classif_posterior": [3, 11], "posterior_prob": [3, 11], "advantag": [3, 11], "procedur": [3, 6, 8], "veri": [3, 5], "effici": 3, "everi": [3, 8, 11], "leverag": 3, "speed": [3, 11], "up": [3, 4, 8, 9, 11], "over": [3, 4, 8], "customarili": [3, 4], "done": 3, "four": 3, "cc": [3, 5, 11], "simplest": 3, "deliv": [3, 11], "adjust": [3, 6, 8, 11], "pcc": [3, 4, 5, 11], "soft": 3, "serv": [3, 8, 10], "complet": [3, 5, 11], "equip": [3, 5], "svm": [3, 5, 6, 9, 10, 11], "linearsvc": [3, 5, 10], "pickl": [3, 8, 10, 11], "alia": [3, 8, 10, 11], "classifyandcount": [3, 11], "estim_preval": [3, 6, 11], "rate": [3, 8, 9, 11], "binari": [3, 5, 6, 8, 9, 10, 11], "init": 3, "addit": 3, "val_split": [3, 4, 9, 11], "integ": [3, 8, 9, 10, 11], "k": [3, 6, 8, 9, 10, 11], "fold": [3, 8, 10, 11], "cross": [3, 8, 9, 10, 11], "specif": [3, 4, 8], "held": [3, 4, 8, 9, 11], "out": [3, 4, 5, 8, 9, 10, 11], "postpon": 3, "constructor": 3, "prevail": 3, "overrid": 3, "illustr": [3, 4, 5], "seem": 3, "calibr": [3, 8], "calibratedclassifiercv": 3, "base_estim": 3, "cv": [3, 4], "predict_proba": [3, 9, 11], "As": [3, 4], "calibratedclassifi": 3, "except": [3, 8, 11], "rais": [3, 8, 11], "lastli": 3, "everyth": 3, "said": 3, "aboud": 3, "sld": [3, 11], "expectationmaximizationquantifi": [3, 11], "describ": [3, 8, 11], "saeren": [3, 11], "m": [3, 8, 11], "latinn": [3, 11], "decaesteck": [3, 11], "c": [3, 4, 8, 9, 10, 11], "2002": 3, "priori": 3, "14": 3, "41": 3, "attempt": [3, 11], "although": [3, 4, 5, 11], "improv": [3, 8, 9, 11], "rank": [3, 9], "almost": 3, "alwai": [3, 4, 5, 11], "among": 3, "effect": 3, "carri": [3, 10, 11], "gonz\u00e1lez": 3, "castro": 3, "v": [3, 8, 9, 11], "alaiz": 3, "rodr\u0131": 3, "guez": 3, "alegr": 3, "2013": 3, "scienc": 3, "218": 3, "146": 3, "It": [3, 4, 5, 8], "allia": 3, "hellingerdistancei": [3, 11], "mixtur": [3, 8, 11], "previou": 3, "overridden": [3, 11], "proport": [3, 4, 9, 10, 11], "taken": [3, 8, 9, 10], "itself": [3, 8, 11], "accept": 3, "elm": [3, 11], "famili": [3, 11], "target": [3, 5, 6, 8, 9, 11], "orient": [3, 6, 8, 11], "joachim": [3, 9, 11], "svmq": [3, 11], "d\u00edez": 3, "reliabl": 3, "pattern": 3, "recognit": 3, "48": 3, "591": 3, "604": 3, "svmkld": [3, 11], "multivari": [3, 9], "transact": 3, "discoveri": 3, "articl": [3, 4], "svmnkld": [3, 11], "svmae": [3, 11], "error": [3, 4, 6, 7, 9, 11], "svmrae": [3, 11], "what": 3, "nowadai": 3, "consid": [3, 5, 8, 9, 10, 11], "behav": [3, 5], "If": [3, 5, 8, 10, 11], "want": [3, 4], "custom": [3, 6, 10], "modifi": [3, 8], "assign": [3, 10], "Then": 3, "re": [3, 4, 9, 10], "thing": 3, "your": 3, "svmperf_hom": 3, "valid_loss": [3, 9, 11], "mycustomloss": 3, "28": [3, 10], "current": [3, 8, 9, 10, 11], "support": [3, 6, 9, 10, 11], "oper": 3, "trivial": 3, "strategi": [3, 4], "2016": [3, 10, 11], "sentiment": [3, 6, 10], "19": [3, 10], "onevsal": [3, 11], "know": 3, "where": [3, 5, 8, 9, 10, 11], "top": [3, 8, 11], "thu": [3, 4, 5, 8, 9, 11], "nor": 3, "castano": [3, 10], "2019": [3, 10, 11], "dynam": [3, 9, 10, 11], "task": [3, 4, 10], "45": [3, 5, 10], "15": [3, 8, 10], "polici": [3, 11], "processor": 3, "av": [3, 11], "ptr": [3, 11], "member": [3, 11], "d": [3, 11], "static": [3, 11], "red_siz": [3, 11], "pleas": 3, "check": [3, 4, 8], "offer": [3, 6], "torch": [3, 9, 11], "embed": [3, 9, 11], "lstm": [3, 9, 11], "cnn": [3, 11], "its": [3, 4, 8, 9, 11], "layer": [3, 9, 11], "neuralclassifiertrain": [3, 9, 11], "cnnnet": [3, 9, 11], "vocabulary_s": [3, 9, 10, 11], "cuda": [3, 9, 11], "supervis": [4, 6], "strongli": [4, 5], "good": [4, 5], "choic": [4, 11], "hyper": [4, 8, 9], "wherebi": 4, "chosen": [4, 8], "pick": 4, "best": [4, 8, 9, 11], "being": [4, 8, 11], "criteria": 4, "solv": [4, 11], "assess": 4, "own": 4, "right": [4, 8, 10], "impos": [4, 8], "aim": [4, 5], "appropri": 4, "configur": [4, 8], "design": 4, "long": [4, 9], "regard": 4, "next": [4, 8, 9, 10], "section": 4, "argu": 4, "alejandro": 4, "fabrizio": 4, "count": [4, 5, 6, 8, 10, 11], "arxiv": 4, "preprint": 4, "2011": 4, "02552": 4, "2020": [4, 9], "varieti": 4, "exhibit": [4, 5], "degre": 4, "model_select": [4, 7, 11], "gridsearchq": [4, 8, 11], "grid": [4, 8, 11], "explor": [4, 8], "portion": 4, "param_grid": [4, 8, 11], "logspac": [4, 11], "class_weight": [4, 11], "eval_budget": 4, "refit": [4, 8], "retrain": [4, 9], "goe": 4, "end": [4, 8, 11], "best_params_": 4, "best_model_": 4, "101": 4, "5f": 4, "system": [4, 11], "start": 4, "hyperparam": 4, "0001": [4, 11], "got": [4, 11], "24987": 4, "48135": 4, "001": [4, 9, 11], "24866": 4, "100000": 4, "43676": 4, "finish": 4, "param": [4, 8, 9, 11], "19982": 4, "develop": [4, 6], "1010": 4, "5005": 4, "54it": 4, "20342": 4, "altern": 4, "computation": 4, "costli": 4, "try": 4, "theoret": 4, "suboptim": 4, "opt": 4, "gridsearchcv": [4, 11], "10000": 4, "5379": 4, "55it": 4, "41734": 4, "wors": [4, 5, 8], "larg": 4, "between": [4, 5, 6, 8, 9, 11], "modal": 4, "turn": 4, "better": 4, "nonetheless": 4, "happen": [4, 5], "basic": [5, 11], "help": [5, 11], "analys": [5, 6], "outcom": 5, "main": 5, "method_nam": [5, 8, 11], "name": [5, 8, 9, 10, 11], "shape": [5, 8, 9, 10, 11], "correspond": [5, 10], "matrix": [5, 8, 11], "appear": 5, "occur": [5, 10], "merg": 5, "emq": [5, 11], "55": 5, "showcas": 5, "wide": 5, "variant": [5, 6, 8, 11], "linear": [5, 8, 11], "review": [5, 6, 10], "step": [5, 8], "05": [5, 8, 11], "gen_data": 5, "base_classifi": 5, "yield": [5, 8, 10, 11], "tr_prev": [5, 8, 11], "append": 5, "__class__": 5, "__name__": 5, "insight": 5, "view": 5, "y": [5, 8, 9, 10, 11], "axi": [5, 8], "against": 5, "x": [5, 8, 9, 10, 11], "unfortun": 5, "limit": [5, 8, 11], "binary_diagon": [5, 8], "train_prev": [5, 8], "savepath": [5, 8], "bin_diag": 5, "png": 5, "save": [5, 8], "pdf": [5, 11], "cyan": 5, "dot": [5, 8], "color": [5, 8], "band": [5, 8], "hidden": [5, 9, 11], "show_std": [5, 8], "unadjust": 5, "bias": 5, "toward": [5, 10], "seen": [5, 8, 11], "evinc": 5, "box": [5, 8], "binary_bias_glob": [5, 8], "bin_bia": 5, "unbias": 5, "center": 5, "tend": 5, "overestim": 5, "high": [5, 8], "lower": [5, 11], "again": 5, "accordingli": 5, "20": [5, 8, 11], "90": [5, 8], "rewrit": 5, "method_data": 5, "training_preval": 5, "linspac": 5, "training_s": 5, "suffic": 5, "latex": 5, "syntax": 5, "_": [5, 8, 10], "now": 5, "clearli": 5, "binary_bias_bin": [5, 8], "broken": [5, 8], "down": [5, 8, 10], "bin": [5, 8, 11], "To": [5, 10], "nbin": [5, 8, 11], "isometr": [5, 8], "subinterv": 5, "interestingli": 5, "enough": 5, "seemingli": 5, "tendenc": 5, "low": [5, 8, 9], "underestim": 5, "beyond": 5, "67": [5, 8], "curios": 5, "pretti": 5, "discuss": 5, "analyz": 5, "compar": [5, 8], "both": [5, 10], "irrespect": [5, 11], "harder": 5, "interpret": [5, 6, 11], "error_by_drift": [5, 8], "error_nam": [5, 8], "n_bin": [5, 8, 11], "err_drift": 5, "whenev": [5, 8], "clear": 5, "lowest": 5, "difficult": 5, "rememb": 5, "solid": 5, "comparison": 5, "detriment": 5, "visual": [5, 6], "hide": 5, "framework": [6, 11], "written": 6, "root": 6, "concept": 6, "baselin": 6, "integr": 6, "commonli": 6, "facilit": 6, "twitter": [6, 10], "true_preval": 6, "hold": [6, 8, 11], "endeavour": [6, 8], "popular": 6, "expect": [6, 11], "maxim": [6, 11], "hdy": [6, 11], "versatil": 6, "etc": 6, "uci": [6, 10], "nativ": 6, "loss": [6, 9, 11], "perf": [6, 9, 11], "ad": 6, "meta": [6, 8], "plot": [6, 7], "diagon": [6, 8], "bia": [6, 8, 9, 11], "drift": 6, "api": 6, "subpackag": 7, "submodul": 7, "util": [7, 9, 10], "content": 7, "bctscalibr": 9, "nbvscalibr": 9, "recalibratedprobabilisticclassifi": 9, "recalibratedprobabilisticclassifierbas": 9, "classes_": [9, 10, 11], "fit_cv": 9, "fit_tr_val": 9, "tscalibr": 9, "vscalibr": 9, "lowranklogisticregress": 9, "document_embed": 9, "lstmnet": 9, "reset_net_param": 9, "textclassifiernet": 9, "dimens": [8, 9, 10, 11], "forward": [9, 11], "xavier_uniform": 9, "torchdataset": 9, "asdataload": 9, "decision_funct": 9, "splitstratifi": 10, "stat": 10, "train_test": 10, "xp": 10, "xy": 10, "split_random": 10, "split_stratifi": 10, "uniform_sampl": 10, "uniform_sampling_index": 10, "fetch_lequa2022": 10, "warn": 10, "indextransform": 10, "add_word": 10, "fit_transform": 10, "reader": 8, "binar": [8, 10], "from_csv": 10, "from_spars": 10, "from_text": 10, "reindex_label": 10, "getptecondestim": 11, "solve_adjust": 11, "adjustedclassifyandcount": 11, "distributionmatch": 11, "dy": 11, "em": 11, "max_it": 11, "explicitlossminimis": 11, "max": 11, "ms2": 11, "mediansweep": 11, "mediansweep2": 11, "probabilisticadjustedclassifyandcount": 11, "probabilisticclassifyandcount": 11, "smm": 11, "t50": 11, "thresholdoptim": 11, "cross_generate_predict": 11, "cross_generate_predictions_depr": 11, "binaryquantifi": 11, "onevsallgener": 11, "eacc": 11, "ecc": 11, "eemq": 11, "ehdi": 11, "epacc": 11, "valid_polici": 11, "ensemblefactori": 11, "get_probability_distribut": 11, "quanetmodul": 11, "quanettrain": 11, "clean_checkpoint": 11, "clean_checkpoint_dir": 11, "mae_loss": 11, "non_aggreg": 8, "maximumlikelihoodprevalenceestim": 11, "absolute_error": 8, "hat": 8, "frac": 8, "mathcal": 8, "sum_": 8, "acc_error": 8, "y_true": 8, "y_pred": 8, "tp": 8, "tn": 8, "fp": 8, "fn": 8, "stand": [8, 11], "f1_error": 8, "macro": 8, "f_1": 8, "harmon": 8, "recal": 8, "2tp": 8, "independ": [8, 11], "err_nam": 8, "p_hat": 8, "d_": 8, "kl": 8, "log": [8, 10], "factor": 8, "beforehand": 8, "n_sampl": [8, 9], "mean_absolute_error": 8, "mean_relative_absolute_error": 8, "relative_absolute_error": 8, "underlin": 8, "displaystyl": 8, "abstractprotocol": 8, "union": [8, 10, 11], "aggr_speedup": 8, "auto": 8, "evaluation_report": 8, "app": [8, 11], "repeat": 8, "smooth_limits_epsilon": 8, "random_st": [8, 10], "return_typ": 8, "sample_prev": 8, "abstractstochasticseededprotocol": 8, "onlabelledcollectionprotocol": 8, "95": 8, "copi": [8, 10], "quantiti": 8, "labelled_collect": 8, "prevalence_grid": 8, "exhaust": 8, "sum": [8, 11], "implicit": 8, "return_constrained_dim": 8, "rest": [8, 9, 10, 11], "quit": 8, "obvious": 8, "determinist": 8, "anywher": 8, "multipli": 8, "necessari": 8, "samples_paramet": 8, "total": 8, "parent": 8, "sequenc": 8, "enforc": 8, "collat": 8, "arg": [8, 10], "domainmix": 8, "domaina": 8, "domainb": 8, "mixture_point": 8, "domain": 8, "scale": [8, 9, 11], "npp": 8, "draw": 8, "uniformli": 8, "therefor": 8, "get_col": 8, "get_labelled_collect": 8, "on_preclassified_inst": 8, "pre_classif": 8, "in_plac": 8, "usimplexpp": 8, "kraemer": 8, "algorithm": [8, 11], "sens": 8, "guarante": [8, 10], "prefer": 8, "intract": 8, "hellingerdist": 8, "hellingh": 8, "distanc": [8, 11], "hd": [8, 11], "discret": [8, 11], "sqrt": 8, "p_i": 8, "q_i": 8, "real": [8, 9, 10, 11], "topsoedist": 8, "1e": [8, 9, 11], "topso": [8, 11], "adjusted_quantif": 8, "prevalence_estim": 8, "tpr": [8, 11], "fpr": [8, 11], "clip": 8, "exce": 8, "check_prevalence_vector": 8, "raise_except": 8, "toleranz": 8, "08": 8, "combinations_budget": 8, "largest": 8, "dimension": [8, 9, 10, 11], "repetit": 8, "less": [8, 10], "normalize_preval": 8, "l1": [8, 11], "calcul": 8, "binom": 8, "mass": 8, "alloc": [8, 9], "solut": 8, "star": 8, "bar": 8, "prevalence_from_label": 8, "n_instanc": [8, 9, 11], "correctli": 8, "even": 8, "len": 8, "prevalence_from_prob": 8, "bool": [8, 9, 11], "argmax": 8, "prevalence_linspac": 8, "01": [8, 9, 11], "separ": [8, 10], "99": 8, "uniform_prevalence_sampl": 8, "adapt": [8, 9], "post": 8, "http": [8, 10, 11], "stackexchang": 8, "com": 8, "question": 8, "3227": 8, "uniform": [8, 10], "uniform_simplex_sampl": 8, "dict": [8, 10, 11], "timeout": 8, "dictionari": [8, 9, 10, 11], "kei": [8, 10], "quantification_error": 8, "whether": [8, 9, 10, 11], "ignor": [8, 10, 11], "gen": 8, "establish": 8, "timer": 8, "longer": [8, 11], "timeouterror": 8, "bound": [8, 11], "stdout": 8, "best_model": 8, "after": [8, 11], "minim": [8, 11], "routin": [8, 10, 11], "unus": [8, 9], "contanin": 8, "cross_val_predict": 8, "akin": [8, 11], "issu": 8, "reproduc": [8, 10], "pos_class": [8, 10], "titl": 8, "colormap": 8, "listedcolormap": 8, "vertical_xtick": 8, "legend": 8, "local": 8, "sign": 8, "minu": 8, "classs": 8, "compon": [8, 9, 11], "cm": 8, "tab10": 8, "secondari": 8, "global": 8, "method_ord": 8, "henc": [8, 10], "conveni": 8, "multiclass": [8, 10, 11], "inconveni": 8, "leyend": 8, "hightlight": 8, "associ": 8, "brokenbar_supremacy_by_drift": 8, "isomer": 8, "x_error": 8, "y_error": 8, "ttest_alpha": 8, "005": 8, "tail_density_threshold": 8, "region": 8, "chart": 8, "condit": [8, 11], "ii": 8, "significantli": 8, "side": 8, "confid": 8, "percentil": 8, "divid": 8, "amount": 8, "similar": [8, 11], "threshold": [8, 11], "densiti": 8, "tail": 8, "discard": 8, "outlier": 8, "show_dens": 8, "show_legend": 8, "logscal": 8, "vline": 8, "especi": 8, "mai": 8, "cumberson": 8, "gain": 8, "understand": 8, "fare": 8, "regim": 8, "highlight": 8, "vertic": 8, "earlystop": 8, "patienc": [8, 9, 11], "lower_is_bett": 8, "earli": [8, 9, 11], "stop": [8, 9, 11], "epoch": [8, 9, 11], "best_epoch": 8, "best_scor": 8, "consecut": [8, 9, 11], "monitor": 8, "obtaind": 8, "far": [8, 9, 10], "flag": 8, "keep": 8, "track": 8, "boolean": [8, 10, 11], "create_if_not_exist": 8, "makedir": 8, "exist_ok": 8, "join": 8, "dir": [8, 11], "subdir": 8, "anotherdir": 8, "create_parent_dir": 8, "exist": 8, "txt": 8, "download_fil": 8, "url": 8, "archive_filenam": 8, "destin": 8, "filenam": 8, "download_file_if_not_exist": 8, "dowload": 8, "get_quapy_hom": 8, "home": [8, 10], "perman": 8, "map_parallel": 8, "func": 8, "slice": 8, "item": 8, "wrapper": [8, 9, 10, 11], "multiprocess": [8, 11], "delai": 8, "args_i": 8, "silent": [8, 11], "child": 8, "ensur": 8, "pickled_resourc": 8, "pickle_path": 8, "generation_func": 8, "fast": [8, 10], "resourc": 8, "some_arrai": 8, "mock": [8, 9], "rand": 8, "my_arrai": 8, "pkl": 8, "save_text_fil": 8, "disk": 8, "miss": 8, "temp_se": 8, "context": 8, "tempor": 8, "outer": 8, "state": 8, "within": [8, 11], "get_njob": [], "correct": [9, 11], "temperatur": [9, 11], "bct": [9, 11], "abstent": 9, "alexandari": [9, 11], "afterward": [9, 11], "No": [9, 11], "nbv": [9, 11], "baseestim": [9, 11], "calibratorfactori": 9, "n_compon": 9, "kwarg": [9, 10, 11], "decomposit": 9, "truncatedsvd": 9, "princip": 9, "regress": 9, "n_featur": 9, "length": [9, 10], "eventu": [9, 10], "unalt": 9, "emb": 9, "embedding_s": 9, "hidden_s": 9, "repr_siz": 9, "kernel_height": 9, "stride": 9, "pad": [9, 10], "drop_p": 9, "convolut": 9, "vocabulari": [9, 10], "kernel": 9, "drop": 9, "dropout": [9, 11], "batch": 9, "dataload": 9, "tensor": 9, "n_dimens": 9, "lstm_class_nlay": 9, "short": 9, "memori": 9, "net": 9, "weight_decai": 9, "batch_siz": 9, "64": [9, 11], "batch_size_test": 9, "512": [9, 11], "padding_length": 9, "checkpointpath": 9, "checkpoint": [9, 11], "classifier_net": 9, "weight": [9, 10], "decai": 9, "wait": 9, "enabl": 9, "gpu": [9, 11], "vocab_s": 9, "reiniti": 9, "trainer": 9, "disjoint": 9, "embed_s": 9, "nn": 9, "pad_length": 9, "xavier": 9, "shuffl": [9, 10], "longest": 9, "shorter": 9, "svmperf_bas": [9, 11], "classifiermixin": 9, "thorsten": 9, "refer": [9, 10], "svm_perf_learn": 9, "svm_perf_classifi": 9, "trade": 9, "off": 9, "margin": 9, "std": 9, "qacc": 9, "qf1": 9, "qgm": 9, "12": 9, "26": 9, "23": 9, "train_siz": 10, "conform": 10, "round": 10, "loader_kwarg": 10, "read": 10, "tupl": [10, 11], "tr": 10, "te": 10, "csr": 10, "csr_matrix": 10, "4403": 10, "my_collect": 10, "codefram": 10, "larger": [10, 11], "actual": [10, 11], "empti": 10, "met": 10, "whose": [10, 11], "train_prop": 10, "left": [8, 10], "stratif": 10, "greater": 10, "dataset_nam": 10, "data_hom": 10, "test_split": 10, "predefin": 10, "uci_dataset": 10, "dump": 10, "leav": 10, "quay_data": 10, "ml": 10, "5fcvx2": 10, "x2": 10, "offici": 10, "lequa": 10, "competit": 10, "t1a": 10, "t1b": 10, "t2a": 10, "t2b": 10, "raw": 10, "merchandis": 10, "sperduti": 10, "2022": 10, "overview": 10, "clef": 10, "lequa2022_experi": 10, "py": 10, "guid": 10, "val_gen": 10, "test_gen": 10, "samplesfromdir": 10, "minimun": 10, "kept": 10, "subsequ": 10, "mining6": 10, "devel": 10, "style": 10, "countvector": 10, "keyword": [10, 11], "nogap": 10, "regardless": 10, "codifi": 10, "unknown": 10, "surfac": 10, "assert": 10, "gap": 10, "preced": 10, "decid": 10, "uniqu": 10, "rare": 10, "unk": 10, "minimum": [10, 11], "occurr": 10, "org": [10, 11], "stabl": 10, "feature_extract": 10, "html": 10, "subtyp": 10, "spmatrix": 10, "remov": [10, 11], "infrequ": 10, "aka": [10, 11], "sublinear_tf": 10, "scall": 10, "counter": 10, "tfidfvector": 10, "whcih": 10, "had": 10, "encod": 10, "utf": 10, "csv": 10, "feat1": 10, "feat2": 10, "featn": 10, "covari": 10, "express": 10, "row": 10, "class2int": 10, "collet": 10, "fomart": 10, "progress": 10, "sentenc": 10, "classnam": 10, "u1": 10, "misclassif": 11, "n_classes_": [], "fit_classifi": 11, "bypass": 11, "y_": 11, "ptecondestim": 11, "prevs_estim": 11, "ax": 11, "entri": 11, "y_i": 11, "y_j": 11, "_posterior_probabilities_": 11, "attribut": 11, "subclass": 11, "give": 11, "outsid": 11, "unless": 11, "noth": 11, "els": 11, "cdf": 11, "match": 11, "helling": 11, "sought": 11, "channel": 11, "proper": 11, "ch": 11, "di": 11, "dij": 11, "fraction": 11, "th": 11, "tol": 11, "ternari": 11, "dl": 11, "doi": 11, "1145": 11, "3219819": 11, "3220059": 11, "histogram": 11, "toler": 11, "explicit": 11, "exact_train_prev": 11, "recalib": 11, "updat": 11, "likelihood": [9, 11], "mutual": 11, "recurs": 11, "until": 11, "converg": 11, "suggest": 11, "recalibr": 11, "reach": 11, "loop": 11, "cumul": 11, "unlabel": 11, "latter": 11, "forman": 11, "2006": 11, "2008": 11, "goal": 11, "bring": 11, "denomin": 11, "median": 11, "sweep": 11, "binary_quantifi": 11, "prevel": 11, "emploi": 11, "resp": 11, "subobject": 11, "nest": 11, "pipelin": 11, "__": 11, "simplif": 11, "2021": 11, "equival": 11, "cosest": 11, "heurist": 11, "choos": 11, "ground": 11, "complement": 11, "param_mod_sel": 11, "param_model_sel": 11, "min_po": 11, "max_sample_s": 11, "closest": 11, "preliminari": 11, "recomput": 11, "compat": 11, "l": 11, "base_quantifier_class": 11, "factori": 11, "common": 11, "doc_embedding_s": 11, "stats_siz": 11, "lstm_hidden_s": 11, "lstm_nlayer": 11, "ff_layer": 11, "1024": 11, "bidirect": 11, "qdrop_p": 11, "order_bi": 11, "cell": 11, "connect": 11, "ff": 11, "sort": 11, "doc_embed": 11, "doc_posterior": 11, "recip": 11, "care": 11, "regist": 11, "hook": 11, "n_epoch": 11, "tr_iter_per_poch": 11, "va_iter_per_poch": 11, "checkpointdir": 11, "checkpointnam": 11, "phase": 11, "anyth": 11, "truth": 11, "mlpe": 11, "lazi": 11, "put": 11, "assumpion": 11, "beat": [9, 11], "estimant": 11, "kundaj": 9, "shrikumar": 9, "novemb": 9, "232": 9, "pmlr": 9, "outpu": [], "partit": 9, "ight": [], "valueerror": 8, "attach": 10, "mix": 10, "onevsallaggreg": 11, "parallel_backend": 11, "loki": 11, "backend": 11, "cannot": 11, "temp": 11, "getonevsal": 11}, "objects": {"": [[8, 0, 0, "-", "quapy"]], "quapy": [[9, 0, 0, "-", "classification"], [10, 0, 0, "-", "data"], [8, 0, 0, "-", "error"], [8, 0, 0, "-", "evaluation"], [8, 0, 0, "-", "functional"], [11, 0, 0, "-", "method"], [8, 0, 0, "-", "model_selection"], [8, 0, 0, "-", "plot"], [8, 0, 0, "-", "protocol"], [8, 0, 0, "-", "util"]], "quapy.classification": [[9, 0, 0, "-", "calibration"], [9, 0, 0, "-", "methods"], [9, 0, 0, "-", "neural"], [9, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[9, 1, 1, "", "BCTSCalibration"], [9, 1, 1, "", "NBVSCalibration"], [9, 1, 1, "", "RecalibratedProbabilisticClassifier"], [9, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [9, 1, 1, "", "TSCalibration"], [9, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[9, 2, 1, "", "classes_"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "fit_cv"], [9, 3, 1, "", "fit_tr_val"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[9, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural": [[9, 1, 1, "", "CNNnet"], [9, 1, 1, "", "LSTMnet"], [9, 1, 1, "", "NeuralClassifierTrainer"], [9, 1, 1, "", "TextClassifierNet"], [9, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[9, 2, 1, "", "device"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "reset_net_params"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[9, 3, 1, "", "dimensions"], [9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "forward"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict_proba"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"], [9, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[9, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[9, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[9, 3, 1, "", "decision_function"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "set_params"], [9, 4, 1, "", "valid_losses"]], "quapy.data": [[10, 0, 0, "-", "base"], [10, 0, 0, "-", "datasets"], [10, 0, 0, "-", "preprocessing"], [10, 0, 0, "-", "reader"]], "quapy.data.base": [[10, 1, 1, "", "Dataset"], [10, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[10, 3, 1, "", "SplitStratified"], [10, 2, 1, "", "binary"], [10, 2, 1, "", "classes_"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 3, 1, "", "stats"], [10, 2, 1, "", "train_test"], [10, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[10, 2, 1, "", "X"], [10, 2, 1, "", "Xp"], [10, 2, 1, "", "Xy"], [10, 2, 1, "", "binary"], [10, 3, 1, "", "counts"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 3, 1, "", "mix"], [10, 2, 1, "", "n_classes"], [10, 2, 1, "", "p"], [10, 3, 1, "", "prevalence"], [10, 3, 1, "", "sampling"], [10, 3, 1, "", "sampling_from_index"], [10, 3, 1, "", "sampling_index"], [10, 3, 1, "", "split_random"], [10, 3, 1, "", "split_stratified"], [10, 3, 1, "", "stats"], [10, 3, 1, "", "uniform_sampling"], [10, 3, 1, "", "uniform_sampling_index"], [10, 2, 1, "", "y"]], "quapy.data.datasets": [[10, 5, 1, "", "fetch_UCIDataset"], [10, 5, 1, "", "fetch_UCILabelledCollection"], [10, 5, 1, "", "fetch_lequa2022"], [10, 5, 1, "", "fetch_reviews"], [10, 5, 1, "", "fetch_twitter"], [10, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[10, 1, 1, "", "IndexTransformer"], [10, 5, 1, "", "index"], [10, 5, 1, "", "reduce_columns"], [10, 5, 1, "", "standardize"], [10, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[10, 3, 1, "", "add_word"], [10, 3, 1, "", "fit"], [10, 3, 1, "", "fit_transform"], [10, 3, 1, "", "transform"], [10, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[10, 5, 1, "", "binarize"], [10, 5, 1, "", "from_csv"], [10, 5, 1, "", "from_sparse"], [10, 5, 1, "", "from_text"], [10, 5, 1, "", "reindex_labels"]], "quapy.error": [[8, 5, 1, "", "absolute_error"], [8, 5, 1, "", "acc_error"], [8, 5, 1, "", "acce"], [8, 5, 1, "", "ae"], [8, 5, 1, "", "f1_error"], [8, 5, 1, "", "f1e"], [8, 5, 1, "", "from_name"], [8, 5, 1, "", "kld"], [8, 5, 1, "", "mae"], [8, 5, 1, "", "mean_absolute_error"], [8, 5, 1, "", "mean_relative_absolute_error"], [8, 5, 1, "", "mkld"], [8, 5, 1, "", "mnkld"], [8, 5, 1, "", "mrae"], [8, 5, 1, "", "mse"], [8, 5, 1, "", "nkld"], [8, 5, 1, "", "rae"], [8, 5, 1, "", "relative_absolute_error"], [8, 5, 1, "", "se"], [8, 5, 1, "", "smooth"]], "quapy.evaluation": [[8, 5, 1, "", "evaluate"], [8, 5, 1, "", "evaluation_report"], [8, 5, 1, "", "prediction"]], "quapy.functional": [[8, 5, 1, "", "HellingerDistance"], [8, 5, 1, "", "TopsoeDistance"], [8, 5, 1, "", "adjusted_quantification"], [8, 5, 1, "", "check_prevalence_vector"], [8, 5, 1, "", "get_nprevpoints_approximation"], [8, 5, 1, "", "normalize_prevalence"], [8, 5, 1, "", "num_prevalence_combinations"], [8, 5, 1, "", "prevalence_from_labels"], [8, 5, 1, "", "prevalence_from_probabilities"], [8, 5, 1, "", "prevalence_linspace"], [8, 5, 1, "", "strprev"], [8, 5, 1, "", "uniform_prevalence_sampling"], [8, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[11, 0, 0, "-", "aggregative"], [11, 0, 0, "-", "base"], [11, 0, 0, "-", "meta"], [11, 0, 0, "-", "neural"], [11, 0, 0, "-", "non_aggregative"]], "quapy.method.aggregative": [[11, 1, 1, "", "ACC"], [11, 4, 1, "", "AdjustedClassifyAndCount"], [11, 1, 1, "", "AggregativeProbabilisticQuantifier"], [11, 1, 1, "", "AggregativeQuantifier"], [11, 1, 1, "", "CC"], [11, 4, 1, "", "ClassifyAndCount"], [11, 1, 1, "", "DistributionMatching"], [11, 1, 1, "", "DyS"], [11, 1, 1, "", "ELM"], [11, 1, 1, "", "EMQ"], [11, 4, 1, "", "ExpectationMaximizationQuantifier"], [11, 4, 1, "", "ExplicitLossMinimisation"], [11, 1, 1, "", "HDy"], [11, 4, 1, "", "HellingerDistanceY"], [11, 1, 1, "", "MAX"], [11, 1, 1, "", "MS"], [11, 1, 1, "", "MS2"], [11, 4, 1, "", "MedianSweep"], [11, 4, 1, "", "MedianSweep2"], [11, 1, 1, "", "OneVsAllAggregative"], [11, 1, 1, "", "PACC"], [11, 1, 1, "", "PCC"], [11, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [11, 4, 1, "", "ProbabilisticClassifyAndCount"], [11, 4, 1, "", "SLD"], [11, 1, 1, "", "SMM"], [11, 1, 1, "", "SVMAE"], [11, 1, 1, "", "SVMKLD"], [11, 1, 1, "", "SVMNKLD"], [11, 1, 1, "", "SVMQ"], [11, 1, 1, "", "SVMRAE"], [11, 1, 1, "", "T50"], [11, 1, 1, "", "ThresholdOptimization"], [11, 1, 1, "", "X"], [11, 5, 1, "", "cross_generate_predictions"], [11, 5, 1, "", "cross_generate_predictions_depr"]], "quapy.method.aggregative.ACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"], [11, 3, 1, "", "solve_adjustment"]], "quapy.method.aggregative.AggregativeProbabilisticQuantifier": [[11, 3, 1, "", "classify"]], "quapy.method.aggregative.AggregativeQuantifier": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 2, 1, "", "classifier"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.aggregative.CC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DistributionMatching": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DyS": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ELM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.EMQ": [[11, 3, 1, "", "EM"], [11, 4, 1, "", "EPSILON"], [11, 4, 1, "", "MAX_ITER"], [11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.OneVsAllAggregative": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"]], "quapy.method.aggregative.PACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.SMM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ThresholdOptimization": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.base": [[11, 1, 1, "", "BaseQuantifier"], [11, 1, 1, "", "BinaryQuantifier"], [11, 1, 1, "", "OneVsAll"], [11, 1, 1, "", "OneVsAllGeneric"], [11, 5, 1, "", "getOneVsAll"]], "quapy.method.base.BaseQuantifier": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.meta": [[11, 5, 1, "", "EACC"], [11, 5, 1, "", "ECC"], [11, 5, 1, "", "EEMQ"], [11, 5, 1, "", "EHDy"], [11, 5, 1, "", "EPACC"], [11, 1, 1, "", "Ensemble"], [11, 5, 1, "", "ensembleFactory"], [11, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[11, 4, 1, "", "VALID_POLICIES"], [11, 2, 1, "", "aggregative"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 2, 1, "", "probabilistic"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.neural": [[11, 1, 1, "", "QuaNetModule"], [11, 1, 1, "", "QuaNetTrainer"], [11, 5, 1, "", "mae_loss"]], "quapy.method.neural.QuaNetModule": [[11, 2, 1, "", "device"], [11, 3, 1, "", "forward"], [11, 4, 1, "", "training"]], "quapy.method.neural.QuaNetTrainer": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "clean_checkpoint"], [11, 3, 1, "", "clean_checkpoint_dir"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[11, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.model_selection": [[8, 1, 1, "", "GridSearchQ"], [8, 5, 1, "", "cross_val_predict"]], "quapy.model_selection.GridSearchQ": [[8, 3, 1, "", "best_model"], [8, 3, 1, "", "fit"], [8, 3, 1, "", "get_params"], [8, 3, 1, "", "quantify"], [8, 3, 1, "", "set_params"]], "quapy.plot": [[8, 5, 1, "", "binary_bias_bins"], [8, 5, 1, "", "binary_bias_global"], [8, 5, 1, "", "binary_diagonal"], [8, 5, 1, "", "brokenbar_supremacy_by_drift"], [8, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[8, 1, 1, "", "APP"], [8, 1, 1, "", "AbstractProtocol"], [8, 1, 1, "", "AbstractStochasticSeededProtocol"], [8, 1, 1, "", "DomainMixer"], [8, 1, 1, "", "NPP"], [8, 1, 1, "", "OnLabelledCollectionProtocol"], [8, 1, 1, "", "USimplexPP"]], "quapy.protocol.APP": [[8, 3, 1, "", "prevalence_grid"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[8, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[8, 3, 1, "", "collator"], [8, 2, 1, "", "random_state"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.NPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[8, 4, 1, "", "RETURN_TYPES"], [8, 3, 1, "", "get_collator"], [8, 3, 1, "", "get_labelled_collection"], [8, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.USimplexPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.util": [[8, 1, 1, "", "EarlyStop"], [8, 5, 1, "", "create_if_not_exist"], [8, 5, 1, "", "create_parent_dir"], [8, 5, 1, "", "download_file"], [8, 5, 1, "", "download_file_if_not_exists"], [8, 5, 1, "", "get_quapy_home"], [8, 5, 1, "", "map_parallel"], [8, 5, 1, "", "parallel"], [8, 5, 1, "", "pickled_resource"], [8, 5, 1, "", "save_text_file"], [8, 5, 1, "", "temp_seed"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"dataset": [0, 10], "review": 0, "twitter": 0, "sentiment": 0, "uci": 0, "machin": 0, "learn": 0, "issu": 0, "ad": 0, "custom": 0, "data": [0, 10], "process": 0, "evalu": [1, 8], "error": [1, 5, 8], "measur": 1, "protocol": [1, 8], "instal": 2, "requir": 2, "svm": 2, "perf": 2, "quantif": [2, 3, 4, 5], "orient": [2, 4], "loss": [2, 3, 4], "method": [3, 9, 11], "aggreg": [3, 11], "The": 3, "classifi": 3, "count": 3, "variant": 3, "expect": 3, "maxim": 3, "emq": 3, "helling": 3, "distanc": 3, "y": 3, "hdy": 3, "explicit": 3, "minim": 3, "meta": [3, 11], "model": [3, 4], "ensembl": 3, "quanet": 3, "neural": [3, 9, 11], "network": 3, "select": 4, "target": 4, "classif": [4, 9], "plot": [5, 8], "diagon": 5, "bia": 5, "drift": 5, "welcom": 6, "quapi": [6, 7, 8, 9, 10, 11], "": 6, "document": 6, "introduct": 6, "A": 6, "quick": 6, "exampl": 6, "featur": 6, "content": [6, 8, 9, 10, 11], "indic": 6, "tabl": 6, "packag": [8, 9, 10, 11], "subpackag": 8, "submodul": [8, 9, 10, 11], "function": 8, "model_select": 8, "util": 8, "modul": [8, 9, 10, 11], "calibr": 9, "svmperf": 9, "base": [10, 11], "preprocess": 10, "reader": 10, "non_aggreg": 11}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Datasets": [[0, "datasets"]], "Reviews Datasets": [[0, "reviews-datasets"]], "Twitter Sentiment Datasets": [[0, "twitter-sentiment-datasets"]], "UCI Machine Learning": [[0, "uci-machine-learning"]], "Issues:": [[0, "issues"]], "Adding Custom Datasets": [[0, "adding-custom-datasets"]], "Data Processing": [[0, "data-processing"]], "Evaluation": [[1, "evaluation"]], "Error Measures": [[1, "error-measures"]], "Evaluation Protocols": [[1, "evaluation-protocols"]], "Installation": [[2, "installation"]], "Requirements": [[2, "requirements"]], "SVM-perf with quantification-oriented losses": [[2, "svm-perf-with-quantification-oriented-losses"]], "Quantification Methods": [[3, "quantification-methods"]], "Aggregative Methods": [[3, "aggregative-methods"]], "The Classify & Count variants": [[3, "the-classify-count-variants"]], "Expectation Maximization (EMQ)": [[3, "expectation-maximization-emq"]], "Hellinger Distance y (HDy)": [[3, "hellinger-distance-y-hdy"]], "Explicit Loss Minimization": [[3, "explicit-loss-minimization"]], "Meta Models": [[3, "meta-models"]], "Ensembles": [[3, "ensembles"]], "The QuaNet neural network": [[3, "the-quanet-neural-network"]], "Model Selection": [[4, "model-selection"]], "Targeting a Quantification-oriented loss": [[4, "targeting-a-quantification-oriented-loss"]], "Targeting a Classification-oriented loss": [[4, "targeting-a-classification-oriented-loss"]], "Plotting": [[5, "plotting"]], "Diagonal Plot": [[5, "diagonal-plot"]], "Quantification bias": [[5, "quantification-bias"]], "Error by Drift": [[5, "error-by-drift"]], "Welcome to QuaPy\u2019s documentation!": [[6, "welcome-to-quapy-s-documentation"]], "Introduction": [[6, "introduction"]], "A quick example:": [[6, "a-quick-example"]], "Features": [[6, "features"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "quapy": [[7, "quapy"]], "quapy package": [[8, "quapy-package"]], "Submodules": [[8, "submodules"], [9, "submodules"], [10, "submodules"], [11, "submodules"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.protocol": [[8, "quapy-protocol"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.util": [[8, "module-quapy.util"]], "Subpackages": [[8, "subpackages"]], "Module contents": [[8, "module-quapy"], [9, "module-quapy.classification"], [10, "module-quapy.data"], [11, "module-quapy.method"]], "quapy.classification package": [[9, "quapy-classification-package"]], "quapy.classification.calibration": [[9, "quapy-classification-calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "quapy.data package": [[10, "quapy-data-package"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "quapy.method package": [[11, "quapy-method-package"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[8, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol"]], "domainmixer (class in quapy.protocol)": [[8, "quapy.protocol.DomainMixer"]], "earlystop (class in quapy.util)": [[8, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[8, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[8, "quapy.functional.HellingerDistance"]], "npp (class in quapy.protocol)": [[8, "quapy.protocol.NPP"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[8, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "topsoedistance() (in module quapy.functional)": [[8, "quapy.functional.TopsoeDistance"]], "usimplexpp (class in quapy.protocol)": [[8, "quapy.protocol.USimplexPP"]], "absolute_error() (in module quapy.error)": [[8, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[8, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[8, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[8, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[8, "quapy.error.ae"]], "best_model() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[8, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[8, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[8, "quapy.functional.check_prevalence_vector"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "create_if_not_exist() (in module quapy.util)": [[8, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[8, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[8, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[8, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[8, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[8, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluate"]], "evaluation_report() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluation_report"]], "f1_error() (in module quapy.error)": [[8, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[8, "quapy.error.f1e"]], "fit() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[8, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[8, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[8, "quapy.util.get_quapy_home"]], "kld() (in module quapy.error)": [[8, "quapy.error.kld"]], "mae() (in module quapy.error)": [[8, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[8, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[8, "quapy.error.mkld"]], "mnkld() (in module quapy.error)": [[8, "quapy.error.mnkld"]], "module": [[8, "module-quapy"], [8, "module-quapy.error"], [8, "module-quapy.evaluation"], [8, "module-quapy.functional"], [8, "module-quapy.model_selection"], [8, "module-quapy.plot"], [8, "module-quapy.protocol"], [8, "module-quapy.util"], [9, "module-quapy.classification"], [9, "module-quapy.classification.calibration"], [9, "module-quapy.classification.methods"], [9, "module-quapy.classification.neural"], [9, "module-quapy.classification.svmperf"], [10, "module-quapy.data"], [10, "module-quapy.data.base"], [10, "module-quapy.data.datasets"], [10, "module-quapy.data.preprocessing"], [10, "module-quapy.data.reader"], [11, "module-quapy.method"], [11, "module-quapy.method.aggregative"], [11, "module-quapy.method.base"], [11, "module-quapy.method.meta"], [11, "module-quapy.method.neural"], [11, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[8, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[8, "quapy.error.mse"]], "nkld() (in module quapy.error)": [[8, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[8, "quapy.functional.normalize_prevalence"]], "num_prevalence_combinations() (in module quapy.functional)": [[8, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "parallel() (in module quapy.util)": [[8, "quapy.util.parallel"]], "pickled_resource() (in module quapy.util)": [[8, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[8, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[8, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[8, "module-quapy"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.protocol": [[8, "module-quapy.protocol"]], "quapy.util": [[8, "module-quapy.util"]], "rae() (in module quapy.error)": [[8, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[8, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[8, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[8, "quapy.error.smooth"]], "strprev() (in module quapy.functional)": [[8, "quapy.functional.strprev"]], "temp_seed() (in module quapy.util)": [[8, "quapy.util.temp_seed"]], "total() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[8, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_simplex_sampling"]], "bctscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.BCTSCalibration"]], "cnnnet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.CNNnet"]], "lstmnet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.LSTMnet"]], "lowranklogisticregression (class in quapy.classification.methods)": [[9, "quapy.classification.methods.LowRankLogisticRegression"]], "nbvscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.NBVSCalibration"]], "neuralclassifiertrainer (class in quapy.classification.neural)": [[9, "quapy.classification.neural.NeuralClassifierTrainer"]], "recalibratedprobabilisticclassifier (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifier"]], "recalibratedprobabilisticclassifierbase (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase"]], "svmperf (class in quapy.classification.svmperf)": [[9, "quapy.classification.svmperf.SVMperf"]], "tscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.TSCalibration"]], "textclassifiernet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.TextClassifierNet"]], "torchdataset (class in quapy.classification.neural)": [[9, "quapy.classification.neural.TorchDataset"]], "vscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.VSCalibration"]], "asdataloader() (quapy.classification.neural.torchdataset method)": [[9, "quapy.classification.neural.TorchDataset.asDataloader"]], "classes_ (quapy.classification.calibration.recalibratedprobabilisticclassifierbase property)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.classes_"]], "decision_function() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.decision_function"]], "device (quapy.classification.neural.neuralclassifiertrainer property)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.device"]], "dimensions() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.dimensions"]], "document_embedding() (quapy.classification.neural.cnnnet method)": [[9, "quapy.classification.neural.CNNnet.document_embedding"]], "document_embedding() (quapy.classification.neural.lstmnet method)": [[9, "quapy.classification.neural.LSTMnet.document_embedding"]], "document_embedding() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.document_embedding"]], "fit() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit"]], "fit() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.fit"]], "fit() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.fit"]], "fit() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.fit"]], "fit_cv() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_cv"]], "fit_tr_val() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_tr_val"]], "forward() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.forward"]], "get_params() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.get_params"]], "get_params() (quapy.classification.neural.cnnnet method)": [[9, "quapy.classification.neural.CNNnet.get_params"]], "get_params() (quapy.classification.neural.lstmnet method)": [[9, "quapy.classification.neural.LSTMnet.get_params"]], "get_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.get_params"]], "get_params() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.get_params"]], "predict() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict"]], "predict() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.predict"]], "predict() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.predict"]], "predict() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.predict"]], "predict_proba() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict_proba"]], "predict_proba() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.predict_proba"]], "predict_proba() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.predict_proba"]], "predict_proba() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.predict_proba"]], "quapy.classification": [[9, "module-quapy.classification"]], "quapy.classification.calibration": [[9, "module-quapy.classification.calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "reset_net_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.reset_net_params"]], "set_params() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.set_params"]], "set_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.set_params"]], "set_params() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.set_params"]], "training (quapy.classification.neural.cnnnet attribute)": [[9, "quapy.classification.neural.CNNnet.training"]], "training (quapy.classification.neural.lstmnet attribute)": [[9, "quapy.classification.neural.LSTMnet.training"]], "training (quapy.classification.neural.textclassifiernet attribute)": [[9, "quapy.classification.neural.TextClassifierNet.training"]], "transform() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.transform"]], "transform() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.transform"]], "valid_losses (quapy.classification.svmperf.svmperf attribute)": [[9, "quapy.classification.svmperf.SVMperf.valid_losses"]], "vocabulary_size (quapy.classification.neural.cnnnet property)": [[9, "quapy.classification.neural.CNNnet.vocabulary_size"]], "vocabulary_size (quapy.classification.neural.lstmnet property)": [[9, "quapy.classification.neural.LSTMnet.vocabulary_size"]], "vocabulary_size (quapy.classification.neural.textclassifiernet property)": [[9, "quapy.classification.neural.TextClassifierNet.vocabulary_size"]], "xavier_uniform() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.xavier_uniform"]], "dataset (class in quapy.data.base)": [[10, "quapy.data.base.Dataset"]], "indextransformer (class in quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.IndexTransformer"]], "labelledcollection (class in quapy.data.base)": [[10, "quapy.data.base.LabelledCollection"]], "splitstratified() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.SplitStratified"]], "x (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.X"]], "xp (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xp"]], "xy (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xy"]], "add_word() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.add_word"]], "binarize() (in module quapy.data.reader)": [[10, "quapy.data.reader.binarize"]], "binary (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.binary"]], "binary (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.binary"]], "classes_ (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.classes_"]], "counts() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.counts"]], "fetch_ucidataset() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCIDataset"]], "fetch_ucilabelledcollection() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCILabelledCollection"]], "fetch_lequa2022() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_lequa2022"]], "fetch_reviews() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_reviews"]], "fetch_twitter() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_twitter"]], "fit() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit"]], "fit_transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit_transform"]], "from_csv() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_csv"]], "from_sparse() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_sparse"]], "from_text() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_text"]], "index() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.index"]], "kfcv() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.kFCV"]], "kfcv() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.kFCV"]], "load() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.load"]], "load() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.load"]], "mix() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.mix"]], "n_classes (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.n_classes"]], "n_classes (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.n_classes"]], "p (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.p"]], "prevalence() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.prevalence"]], "quapy.data": [[10, "module-quapy.data"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "reduce_columns() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.reduce_columns"]], "reindex_labels() (in module quapy.data.reader)": [[10, "quapy.data.reader.reindex_labels"]], "sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling"]], "sampling_from_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_from_index"]], "sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_index"]], "split_random() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_random"]], "split_stratified() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_stratified"]], "standardize() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.standardize"]], "stats() (quapy.data.base.dataset method)": [[10, "quapy.data.base.Dataset.stats"]], "stats() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.stats"]], "text2tfidf() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.text2tfidf"]], "train_test (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.train_test"]], "transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.transform"]], "uniform_sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling"]], "uniform_sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling_index"]], "vocabulary_size (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.vocabulary_size"]], "vocabulary_size() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.vocabulary_size"]], "warn() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.warn"]], "y (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.y"]], "acc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativeprobabilisticquantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier"]], "aggregativequantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeQuantifier"]], "basequantifier (class in quapy.method.base)": [[11, "quapy.method.base.BaseQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[11, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.CC"]], "classifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ClassifyAndCount"]], "distributionmatching (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DistributionMatching"]], "dys (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[11, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[11, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[11, "quapy.method.meta.EHDy"]], "elm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ELM"]], "em() (quapy.method.aggregative.emq class method)": [[11, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.EMQ"]], "epacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[11, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "explicitlossminimisation (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExplicitLossMinimisation"]], "hdy (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.HellingerDistanceY"]], "max (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.MAX_ITER"]], "ms (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS"]], "ms2 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "mediansweep (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep"]], "mediansweep2 (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep2"]], "onevsall (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAll"]], "onevsallaggregative (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.OneVsAllAggregative"]], "onevsallgeneric (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetModule"]], "quanettrainer (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetTrainer"]], "sld (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SMM"]], "svmae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMAE"]], "svmkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMKLD"]], "svmnkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMNKLD"]], "svmq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMQ"]], "svmrae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMRAE"]], "t50 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.T50"]], "thresholdoptimization (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[11, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.X"]], "aggregate() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsallaggregative method)": [[11, "quapy.method.aggregative.OneVsAllAggregative.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.aggregate"]], "aggregate() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.aggregate"]], "aggregative (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.aggregative"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.base.onevsallgeneric property)": [[11, "quapy.method.base.OneVsAllGeneric.classes_"]], "classes_ (quapy.method.neural.quanettrainer property)": [[11, "quapy.method.neural.QuaNetTrainer.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classify() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.classify"]], "classify() (quapy.method.aggregative.aggregativeprobabilisticquantifier method)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier.classify"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.classify"]], "classify() (quapy.method.aggregative.onevsallaggregative method)": [[11, "quapy.method.aggregative.OneVsAllAggregative.classify"]], "classify() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.classify"]], "clean_checkpoint() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint_dir"]], "cross_generate_predictions() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions"]], "cross_generate_predictions_depr() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions_depr"]], "device (quapy.method.neural.quanetmodule property)": [[11, "quapy.method.neural.QuaNetModule.device"]], "ensemblefactory() (in module quapy.method.meta)": [[11, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.fit"]], "fit() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.fit"]], "fit() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.fit"]], "fit() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.fit"]], "fit() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.fit"]], "fit() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.fit"]], "fit() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.fit"]], "fit() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.fit"]], "fit() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.fit"]], "fit() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.fit"]], "fit() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "forward() (quapy.method.neural.quanetmodule method)": [[11, "quapy.method.neural.QuaNetModule.forward"]], "getonevsall() (in module quapy.method.base)": [[11, "quapy.method.base.getOneVsAll"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[11, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.get_params"]], "get_probability_distribution() (in module quapy.method.meta)": [[11, "quapy.method.meta.get_probability_distribution"]], "mae_loss() (in module quapy.method.neural)": [[11, "quapy.method.neural.mae_loss"]], "predict_proba() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quapy.method": [[11, "module-quapy.method"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]], "set_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.set_params"]], "solve_adjustment() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.solve_adjustment"]], "training (quapy.method.neural.quanetmodule attribute)": [[11, "quapy.method.neural.QuaNetModule.training"]]}}) \ No newline at end of file diff --git a/examples/custom_quantifier.py b/examples/custom_quantifier.py index a025b87..705c371 100644 --- a/examples/custom_quantifier.py +++ b/examples/custom_quantifier.py @@ -11,7 +11,7 @@ from sklearn.linear_model import LogisticRegression # Define a custom quantifier: for this example, we will consider a new quantification algorithm that uses a # logistic regressor for generating posterior probabilities, and then applies a custom threshold value to the # posteriors. Since the quantifier internally uses a classifier, it is an aggregative quantifier; and since it -# relies on posterior probabilities, then it is a probabilistic aggregative quantifier. Note also it has an +# relies on posterior probabilities, it is a probabilistic-aggregative quantifier. Note also it has an # internal hyperparameter (let say, alpha) which is the decision threshold. Let's also assume the quantifier # is binary, for simplicity. @@ -47,13 +47,13 @@ if __name__ == '__main__': # load the IMDb dataset train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test - train, val = train.split_stratified(train_prop=0.75) # model selection # let us assume we want to explore our hyperparameter alpha along with one hyperparameter of the classifier + train, val = train.split_stratified(train_prop=0.75) param_grid = { - 'alpha': np.linspace(0,1,11), # quantifier-dependent hyperparameter - 'classifier__C': np.logspace(-2,2,5) # classifier-dependent hyperparameter + 'alpha': np.linspace(0, 1, 11), # quantifier-dependent hyperparameter + 'classifier__C': np.logspace(-2, 2, 5) # classifier-dependent hyperparameter } quantifier = GridSearchQ(quantifier, param_grid, protocol=APP(val), n_jobs=-1, verbose=True).fit(train) diff --git a/examples/lequa2022_experiments.py b/examples/lequa2022_experiments.py index 41bc495..6b6225d 100644 --- a/examples/lequa2022_experiments.py +++ b/examples/lequa2022_experiments.py @@ -9,19 +9,37 @@ from method.aggregative import EMQ from model_selection import GridSearchQ import pandas as pd +""" +This example shows hoy to use the LeQua datasets (new in v0.1.7). For more information about the datasets, and the +LeQua competition itself, check: +https://lequa2022.github.io/index (the site of the competition) +https://ceur-ws.org/Vol-3180/paper-146.pdf (the overview paper) +""" +# there are 4 tasks (T1A, T1B, T2A, T2B) task = 'T1A' +# set the sample size in the environment. The sample size is task-dendendent and can be consulted by doing: qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task] +qp.environ['N_JOBS'] = -1 + +# the fetch method returns a training set (an instance of LabelledCollection) and two generators: one for the +# validation set and another for the test sets. These generators are both instances of classes that extend +# AbstractProtocol (i.e., classes that implement sampling generation procedures) and, in particular, are instances +# of SamplesFromDir, a protocol that simply iterates over pre-generated samples (those provided for the competition) +# stored in a directory. training, val_generator, test_generator = fetch_lequa2022(task=task) # define the quantifier -learner = CalibratedClassifierCV(LogisticRegression()) -quantifier = EMQ(classifier=learner) +quantifier = EMQ(classifier=LogisticRegression()) # model selection -param_grid = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]} -model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, n_jobs=-1, refit=False, verbose=True) +param_grid = { + 'classifier__C': np.logspace(-3, 3, 7), # classifier-dependent: inverse of regularization strength + 'classifier__class_weight': ['balanced', None], # classifier-dependent: weights of each class + 'recalib': ['bcts', 'platt', None] # quantifier-dependent: recalibration method (new in v0.1.7) +} +model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', refit=False, verbose=True) quantifier = model_selection.fit(training) # evaluation diff --git a/examples/one_vs_all_example.py b/examples/one_vs_all.py similarity index 90% rename from examples/one_vs_all_example.py rename to examples/one_vs_all.py index 7488199..9e40551 100644 --- a/examples/one_vs_all_example.py +++ b/examples/one_vs_all.py @@ -12,6 +12,7 @@ and positive. We will use a one-vs-all approach using a binary quantifier for de """ qp.environ['SAMPLE_SIZE'] = 100 +qp.environ['N_JOBS'] = -1 """ Any binary quantifier can be turned into a single-label quantifier by means of getOneVsAll function. @@ -21,7 +22,7 @@ an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all case some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions during evaluation). """ -quantifier = getOneVsAll(MS2(LogisticRegression()), parallel_backend="loky") +quantifier = getOneVsAll(MS2(LogisticRegression())) print(f'the quantifier is an instance of {quantifier.__class__.__name__}') # load a ternary dataset @@ -38,8 +39,8 @@ param_grid = { 'binary_quantifier__classifier__class_weight': ['balanced', None] # classifier-dependent hyperparameter } print('starting model selection') -gs = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), n_jobs=-1, verbose=True, refit=False) -quantifier = gs.fit(train_modsel).best_model() +model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False) +quantifier = model_selection.fit(train_modsel).best_model() print('training on the whole training set') train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 4b14d14..d531ce6 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -84,7 +84,5 @@ Change Log 0.1.7 Things to fix: -------------- - update unit tests -- update Wikis... - improve plots -- documentation of protocols is incomplete diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index e07f665..1f2b8ba 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -1262,7 +1262,7 @@ class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier): is removed and no longer available at predict time. """ - def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='loky'): + def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='multiprocessing'): assert isinstance(binary_quantifier, BaseQuantifier), \ f'{self.binary_quantifier} does not seem to be a Quantifier' assert isinstance(binary_quantifier, AggregativeQuantifier), \ diff --git a/quapy/method/base.py b/quapy/method/base.py index 1803085..1aa64e2 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -54,13 +54,13 @@ class OneVsAll: pass -def getOneVsAll(binary_quantifier, n_jobs=None, parallel_backend='loky'): +def getOneVsAll(binary_quantifier, n_jobs=None): assert isinstance(binary_quantifier, BaseQuantifier), \ f'{binary_quantifier} does not seem to be a Quantifier' if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier): - return qp.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs, parallel_backend) + return qp.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs) else: - return OneVsAllGeneric(binary_quantifier, n_jobs, parallel_backend) + return OneVsAllGeneric(binary_quantifier, n_jobs) class OneVsAllGeneric(OneVsAll,BaseQuantifier): @@ -69,7 +69,7 @@ class OneVsAllGeneric(OneVsAll,BaseQuantifier): quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1. """ - def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='loky'): + def __init__(self, binary_quantifier, n_jobs=None): assert isinstance(binary_quantifier, BaseQuantifier), \ f'{binary_quantifier} does not seem to be a Quantifier' if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier): @@ -77,7 +77,6 @@ class OneVsAllGeneric(OneVsAll,BaseQuantifier): f'you might prefer instantiating {qp.method.aggregative.OneVsAllAggregative.__name__}') self.binary_quantifier = binary_quantifier self.n_jobs = qp._get_njobs(n_jobs) - self.parallel_backend = parallel_backend def fit(self, data: LabelledCollection, fit_classifier=True): assert not data.binary, f'{self.__class__.__name__} expect non-binary data' @@ -89,7 +88,7 @@ class OneVsAllGeneric(OneVsAll,BaseQuantifier): def _parallel(self, func, *args, **kwargs): return np.asarray( - Parallel(n_jobs=self.n_jobs, backend=self.parallel_backend)( + Parallel(n_jobs=self.n_jobs, backend='threading')( delayed(func)(c, *args, **kwargs) for c in self.classes_ ) ) diff --git a/quapy/model_selection.py b/quapy/model_selection.py index b8b9282..84c9707 100644 --- a/quapy/model_selection.py +++ b/quapy/model_selection.py @@ -86,7 +86,8 @@ class GridSearchQ(BaseQuantifier): tinit = time() - hyper = [dict({k: values[i] for i, k in enumerate(params_keys)}) for values in itertools.product(*params_values)] + hyper = [dict({k: val[i] for i, k in enumerate(params_keys)}) for val in itertools.product(*params_values)] + self._sout(f'starting model selection with {self.n_jobs =}') #pass a seed to parallel so it is set in clild processes scores = qp.util.parallel( self._delayed_eval, diff --git a/quapy/protocol.py b/quapy/protocol.py index 1dec78b..70f4a48 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -45,13 +45,13 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): needed for extracting the samples, and :meth:`sample` that, given some parameters as input, deterministically generates a sample. - :param random_state: the seed for allowing to replicate any sequence of samples. Default is None, meaning that - the sequence will be different every time the protocol is called. + :param random_state: the seed for allowing to replicate any sequence of samples. Default is 0, meaning that + the sequence will be consistent every time the protocol is called. """ _random_state = -1 # means "not set" - def __init__(self, random_state=None): + def __init__(self, random_state=0): self.random_state = random_state @property @@ -82,6 +82,13 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): ... def __call__(self): + """ + Yields one sample at a time. The type of object returned depends on the `collator` function. The + default behaviour returns tuples of the form `(sample, prevalence)`. + + :return: a tuple `(sample, prevalence)` if return_type='sample_prev', or an instance of + :class:`qp.data.LabelledCollection` if return_type='labelled_collection' + """ with ExitStack() as stack: if self.random_state == -1: raise ValueError('The random seed has never been initialized. ' @@ -96,13 +103,33 @@ class AbstractStochasticSeededProtocol(AbstractProtocol): class OnLabelledCollectionProtocol: + """ + Protocols that generate samples from a :class:`qp.data.LabelledCollection` object. + """ RETURN_TYPES = ['sample_prev', 'labelled_collection'] def get_labelled_collection(self): + """ + Returns the labelled collection on which this protocol acts. + + :return: an object of type :class:`qp.data.LabelledCollection` + """ return self.data def on_preclassified_instances(self, pre_classifications, in_place=False): + """ + Returns a copy of this protocol that acts on a modified version of the original + :class:`qp.data.LabelledCollection` in which the original instances have been replaced + with the outputs of a classifier for each instance. (This is convenient for speeding-up + the evaluation procedures for many samples, by pre-classifying the instances in advance.) + + :param pre_classifications: the predictions issued by a classifier, typically an array-like + with shape `(n_instances,)` when the classifier is a hard one, or with shape + `(n_instances, n_classes)` when the classifier is a probabilistic one. + :param in_place: whether or not to apply the modification in-place or in a new copy (default). + :return: a copy of this protocol + """ assert len(pre_classifications) == len(self.data), \ f'error: the pre-classified data has different shape ' \ f'(expected {len(self.data)}, found {len(pre_classifications)})' @@ -115,6 +142,15 @@ class OnLabelledCollectionProtocol: @classmethod def get_collator(cls, return_type='sample_prev'): + """ + Returns a collator function, i.e., a function that prepares the yielded data + + :param return_type: either 'sample_prev' (default) if the collator is requested to yield tuples of + `(sample, prevalence)`, or 'labelled_collection' when it is requested to yield instances of + :class:`qp.data.LabelledCollection` + :return: the collator function (a callable function that takes as input an instance of + :class:`qp.data.LabelledCollection`) + """ assert return_type in cls.RETURN_TYPES, \ f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}' if return_type=='sample_prev': @@ -139,13 +175,14 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): grid (default is 21) :param repeats: number of copies for each valid prevalence vector (default is 10) :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1 - :param random_state: allows replicating samples across runs (default None) + :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples + will be the same every time the protocol is called) :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or to "labelled_collection" to get instead instances of LabelledCollection """ def __init__(self, data:LabelledCollection, sample_size=None, n_prevalences=21, repeats=10, - smooth_limits_epsilon=0, random_state=None, return_type='sample_prev'): + smooth_limits_epsilon=0, random_state=0, return_type='sample_prev'): super(APP, self).__init__(random_state) self.data = data self.sample_size = qp._get_sample_size(sample_size) @@ -179,6 +216,11 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): return prevs def samples_parameters(self): + """ + Return all the necessary parameters to replicate the samples as according to the APP protocol. + + :return: a list of indexes that realize the APP sampling + """ indexes = [] for prevs in self.prevalence_grid(): index = self.data.sampling_index(self.sample_size, *prevs) @@ -186,9 +228,20 @@ class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): return indexes def sample(self, index): + """ + Realizes the sample given the index of the instances. + + :param index: indexes of the instances to select + :return: an instance of :class:`qp.data.LabelledCollection` + """ return self.data.sampling_from_index(index) def total(self): + """ + Returns the number of samples that will be generated + + :return: int + """ return F.num_prevalence_combinations(self.n_prevalences, self.data.n_classes, self.repeats) @@ -201,12 +254,14 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. :param repeats: the number of samples to generate. Default is 100. - :param random_state: allows replicating samples across runs (default None) + :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples + will be the same every time the protocol is called) :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data:LabelledCollection, sample_size=None, repeats=100, random_state=None, return_type='sample_prev'): + def __init__(self, data:LabelledCollection, sample_size=None, repeats=100, random_state=0, + return_type='sample_prev'): super(NPP, self).__init__(random_state) self.data = data self.sample_size = qp._get_sample_size(sample_size) @@ -215,6 +270,11 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): + """ + Return all the necessary parameters to replicate the samples as according to the NPP protocol. + + :return: a list of indexes that realize the NPP sampling + """ indexes = [] for _ in range(self.repeats): index = self.data.uniform_sampling_index(self.sample_size) @@ -222,9 +282,20 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): return indexes def sample(self, index): + """ + Realizes the sample given the index of the instances. + + :param index: indexes of the instances to select + :return: an instance of :class:`qp.data.LabelledCollection` + """ return self.data.sampling_from_index(index) def total(self): + """ + Returns the number of samples that will be generated (equals to "repeats") + + :return: int + """ return self.repeats @@ -241,12 +312,13 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. :param repeats: the number of samples to generate. Default is 100. - :param random_state: allows replicating samples across runs (default None) + :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples + will be the same every time the protocol is called) :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or to "labelled_collection" to get instead instances of LabelledCollection """ - def __init__(self, data: LabelledCollection, sample_size=None, repeats=100, random_state=None, + def __init__(self, data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev'): super(USimplexPP, self).__init__(random_state) self.data = data @@ -256,6 +328,11 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): + """ + Return all the necessary parameters to replicate the samples as according to the USimplexPP protocol. + + :return: a list of indexes that realize the USimplexPP sampling + """ indexes = [] for prevs in F.uniform_simplex_sampling(n_classes=self.data.n_classes, size=self.repeats): index = self.data.sampling_index(self.sample_size, *prevs) @@ -263,9 +340,20 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) return indexes def sample(self, index): + """ + Realizes the sample given the index of the instances. + + :param index: indexes of the instances to select + :return: an instance of :class:`qp.data.LabelledCollection` + """ return self.data.sampling_from_index(index) def total(self): + """ + Returns the number of samples that will be generated (equals to "repeats") + + :return: int + """ return self.repeats @@ -273,17 +361,19 @@ class DomainMixer(AbstractStochasticSeededProtocol): """ Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence. - :param domainA: - :param domainB: - :param sample_size: - :param repeats: + :param domainA: one domain, an object of :class:`qp.data.LabelledCollection` + :param domainB: another domain, an object of :class:`qp.data.LabelledCollection` + :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from + qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised. + :param repeats: int, number of samples to draw for every mixture rate :param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence will be taken from the domain A (default). :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself. the specific points - :param random_state: + :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples + will be the same every time the protocol is called) """ def __init__( @@ -294,7 +384,7 @@ class DomainMixer(AbstractStochasticSeededProtocol): repeats=1, prevalence=None, mixture_points=11, - random_state=None, + random_state=0, return_type='sample_prev'): super(DomainMixer, self).__init__(random_state) self.A = domainA @@ -319,6 +409,11 @@ class DomainMixer(AbstractStochasticSeededProtocol): self.collator = OnLabelledCollectionProtocol.get_collator(return_type) def samples_parameters(self): + """ + Return all the necessary parameters to replicate the samples as according to the this protocol. + + :return: a list of zipped indexes (from A and B) that realize the sampling + """ indexesA, indexesB = [], [] for propA in self.mixture_points: for _ in range(self.repeats): @@ -331,12 +426,23 @@ class DomainMixer(AbstractStochasticSeededProtocol): return list(zip(indexesA, indexesB)) def sample(self, indexes): + """ + Realizes the sample given a pair of indexes of the instances from A and B. + + :param indexes: indexes of the instances to select from A and B + :return: an instance of :class:`qp.data.LabelledCollection` + """ indexesA, indexesB = indexes sampleA = self.A.sampling_from_index(indexesA) sampleB = self.B.sampling_from_index(indexesB) return sampleA+sampleB def total(self): + """ + Returns the number of samples that will be generated (equals to "repeats * mixture_points") + + :return: int + """ return self.repeats * len(self.mixture_points) diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index e5d446e..c7e4b15 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -28,13 +28,27 @@ class TestProtocols(unittest.TestCase): self.assertEqual(samples1, samples2) - def test_app_not_replicate(self): - data = mock_labelled_collection() - p = APP(data, sample_size=5, n_prevalences=11) + p = APP(data, sample_size=5, n_prevalences=11) # <- random_state is by default set to 0 samples1 = samples_to_str(p) samples2 = samples_to_str(p) + self.assertEqual(samples1, samples2) + + def test_app_not_replicate(self): + data = mock_labelled_collection() + p = APP(data, sample_size=5, n_prevalences=11, random_state=None) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2) + + p = APP(data, sample_size=5, n_prevalences=11, random_state=42) + samples1 = samples_to_str(p) + p = APP(data, sample_size=5, n_prevalences=11, random_state=0) + samples2 = samples_to_str(p) + self.assertNotEqual(samples1, samples2) def test_app_number(self): @@ -64,13 +78,26 @@ class TestProtocols(unittest.TestCase): self.assertEqual(samples1, samples2) - def test_npp_not_replicate(self): - data = mock_labelled_collection() - p = NPP(data, sample_size=5, repeats=5) + p = NPP(data, sample_size=5, repeats=5) # <- random_state is by default set to 0 samples1 = samples_to_str(p) samples2 = samples_to_str(p) + self.assertEqual(samples1, samples2) + + def test_npp_not_replicate(self): + data = mock_labelled_collection() + p = NPP(data, sample_size=5, repeats=5, random_state=None) + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertNotEqual(samples1, samples2) + + p = NPP(data, sample_size=5, repeats=5, random_state=42) + samples1 = samples_to_str(p) + p = NPP(data, sample_size=5, repeats=5, random_state=0) + samples2 = samples_to_str(p) self.assertNotEqual(samples1, samples2) def test_kraemer_replicate(self): @@ -82,9 +109,16 @@ class TestProtocols(unittest.TestCase): self.assertEqual(samples1, samples2) + p = USimplexPP(data, sample_size=5, repeats=10) # <- random_state is by default set to 0 + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + def test_kraemer_not_replicate(self): data = mock_labelled_collection() - p = USimplexPP(data, sample_size=5, repeats=10) + p = USimplexPP(data, sample_size=5, repeats=10, random_state=None) samples1 = samples_to_str(p) samples2 = samples_to_str(p) @@ -101,10 +135,17 @@ class TestProtocols(unittest.TestCase): self.assertEqual(samples1, samples2) + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11) # <- random_state is by default set to 0 + + samples1 = samples_to_str(p) + samples2 = samples_to_str(p) + + self.assertEqual(samples1, samples2) + def test_covariate_shift_not_replicate(self): dataA = mock_labelled_collection('domA') dataB = mock_labelled_collection('domB') - p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11) + p = DomainMixer(dataA, dataB, sample_size=10, mixture_points=11, random_state=None) samples1 = samples_to_str(p) samples2 = samples_to_str(p) From 7b2d3cb7f1bc6ff69a60e15b156c9bc69730964e Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Sat, 11 Feb 2023 10:08:31 +0100 Subject: [PATCH 53/59] example using svmperf --- examples/one_vs_all_svmperf.py | 54 +++++++++++++++++++++++++++++++++ quapy/classification/svmperf.py | 3 ++ 2 files changed, 57 insertions(+) create mode 100644 examples/one_vs_all_svmperf.py diff --git a/examples/one_vs_all_svmperf.py b/examples/one_vs_all_svmperf.py new file mode 100644 index 0000000..8bf38bd --- /dev/null +++ b/examples/one_vs_all_svmperf.py @@ -0,0 +1,54 @@ +import quapy as qp +from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric, SVMQ +from quapy.method.base import getOneVsAll +from quapy.model_selection import GridSearchQ +from quapy.protocol import USimplexPP +from sklearn.linear_model import LogisticRegression +import numpy as np + +""" +In this example, we will create a quantifier for tweet sentiment analysis considering three classes: negative, neutral, +and positive. We will use a one-vs-all approach using a binary quantifier for demonstration purposes. +""" + +qp.environ['SAMPLE_SIZE'] = 100 +qp.environ['N_JOBS'] = -1 +qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification' + +""" +Any binary quantifier can be turned into a single-label quantifier by means of getOneVsAll function. +This function returns an instance of OneVsAll quantifier. Actually, it either returns the subclass OneVsAllGeneric +when the quantifier is an instance of BaseQuantifier, and it returns OneVsAllAggregative when the quantifier is +an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all cases, using OneVsAllAggregative has +some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions +during evaluation). +""" +quantifier = getOneVsAll(SVMQ()) +print(f'the quantifier is an instance of {quantifier.__class__.__name__}') + +# load a ternary dataset +train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test + +""" +model selection: for this example, we are relying on the USimplexPP protocol, i.e., a variant of the +artificial-prevalence protocol that generates random samples (100 in this case) for randomly picked priors +from the unit simplex. The priors are sampled using the Kraemer algorithm. Note this is in contrast to the +standard APP protocol, that instead explores a prefixed grid of prevalence values. +""" +param_grid = { + 'binary_quantifier__classifier__C': np.logspace(-2,2,5), # classifier-dependent hyperparameter +} +print('starting model selection') +model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False) +quantifier = model_selection.fit(train_modsel).best_model() + +print('training on the whole training set') +train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test +quantifier.fit(train) + +# evaluation +mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae') + +print(f'MAE = {mae:.4f}') + + diff --git a/quapy/classification/svmperf.py b/quapy/classification/svmperf.py index 176b102..7921725 100644 --- a/quapy/classification/svmperf.py +++ b/quapy/classification/svmperf.py @@ -44,6 +44,9 @@ class SVMperf(BaseEstimator, ClassifierMixin): assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported' self.C = parameters['C'] + def get_params(self, deep=True): + return {'C': self.C} + def fit(self, X, y): """ Trains the SVM for the multivariate performance loss From 4c74ff02a3ae5831100d517d344501bf07698a56 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Sat, 11 Feb 2023 10:47:27 +0100 Subject: [PATCH 54/59] svmperf in one-vs-all bugfix --- quapy/CHANGE_LOG.txt | 1 + quapy/method/aggregative.py | 40 ++++++++++++++++++++++++++++++------- quapy/util.py | 2 +- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index d531ce6..48cb586 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -85,4 +85,5 @@ Things to fix: -------------- - update unit tests - improve plots +- svmperf clean temp dirs; check also ELM when instantiated using SVMperf directly diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 1f2b8ba..295af03 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -880,16 +880,27 @@ class ELM(AggregativeQuantifier, BinaryQuantifier): learning algorithm, which has to be installed and patched for the purpose (see this `script `_). + :param classifier: an instance of `SVM perf` or None :param svmperf_base: path to the folder containing the binary files of `SVM perf` :param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`) :param kwargs: rest of SVM perf's parameters """ - def __init__(self, svmperf_base=None, loss='01', **kwargs): + def __init__(self, classifier=None, svmperf_base=None, loss='01', **kwargs): self.svmperf_base = svmperf_base if svmperf_base is not None else qp.environ['SVMPERF_HOME'] self.loss = loss self.kwargs = kwargs - self.classifier = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) + assert classifier is None or isinstance(classifier, SVMperf), \ + 'param error "classifier": instances of ELM can only be instantiated with classifier SVMperf. ' \ + 'This parameter should either be an instance of SVMperf or None, in which case an SVMperf object ' \ + 'will be instantiaded using "svmperf_base" and "loss"' + if classifier is None: + self.classifier = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) + else: + if classifier.loss != loss: + print(f'[warning]: the loss of the SVMperf object passed to arg "classifier" ({classifier.loss}) ' + f'does not coincide with arg "loss" ({loss}); the latter will be ignored') + self.classifier = classifier def fit(self, data: LabelledCollection, fit_classifier=True): self._check_binary(data, self.__class__.__name__) @@ -913,11 +924,14 @@ class SVMQ(ELM): >>> ELM(svmperf_base, loss='q', **kwargs) + :param classifier: not used, added for compatibility :param svmperf_base: path to the folder containing the binary files of `SVM perf` :param kwargs: rest of SVM perf's parameters """ - def __init__(self, svmperf_base=None, **kwargs): + def __init__(self, classifier=None, svmperf_base=None, **kwargs): + assert classifier == None, \ + 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs) @@ -929,11 +943,14 @@ class SVMKLD(ELM): >>> ELM(svmperf_base, loss='kld', **kwargs) + :param classifier: not used, added for compatibility :param svmperf_base: path to the folder containing the binary files of `SVM perf` :param kwargs: rest of SVM perf's parameters """ - def __init__(self, svmperf_base=None, **kwargs): + def __init__(self, classifier=None, svmperf_base=None, **kwargs): + assert classifier == None, \ + 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs) @@ -946,11 +963,14 @@ class SVMNKLD(ELM): >>> ELM(svmperf_base, loss='nkld', **kwargs) + :param classifier: not used, added for compatibility :param svmperf_base: path to the folder containing the binary files of `SVM perf` :param kwargs: rest of SVM perf's parameters """ - def __init__(self, svmperf_base=None, **kwargs): + def __init__(self, classifier=None, svmperf_base=None, **kwargs): + assert classifier == None, \ + 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs) @@ -962,11 +982,14 @@ class SVMAE(ELM): >>> ELM(svmperf_base, loss='mae', **kwargs) + :param classifier: not used, added for compatibility :param svmperf_base: path to the folder containing the binary files of `SVM perf` :param kwargs: rest of SVM perf's parameters """ - def __init__(self, svmperf_base=None, **kwargs): + def __init__(self, classifier=None, svmperf_base=None, **kwargs): + assert classifier == None, \ + 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs) @@ -978,11 +1001,14 @@ class SVMRAE(ELM): >>> ELM(svmperf_base, loss='mrae', **kwargs) + :param classifier: not used, added for compatibility :param svmperf_base: path to the folder containing the binary files of `SVM perf` :param kwargs: rest of SVM perf's parameters """ - def __init__(self, svmperf_base=None, **kwargs): + def __init__(self, classifier=None, svmperf_base=None, **kwargs): + assert classifier == None, \ + 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs) diff --git a/quapy/util.py b/quapy/util.py index 298f02a..5c01eae 100644 --- a/quapy/util.py +++ b/quapy/util.py @@ -38,7 +38,7 @@ def map_parallel(func, args, n_jobs): return list(itertools.chain.from_iterable(results)) -def parallel(func, args, n_jobs, seed = None): +def parallel(func, args, n_jobs, seed=None): """ A wrapper of multiprocessing: From 505d2de823d21d877b08d001ead5c384a499de32 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 13 Feb 2023 12:01:52 +0100 Subject: [PATCH 55/59] elm examples --- examples/explicit_loss_minimization.py | 72 +++++++++ examples/one_vs_all.py | 6 +- examples/one_vs_all_svmperf.py | 54 ------- quapy/classification/svmperf.py | 47 +++--- quapy/method/__init__.py | 11 +- quapy/method/aggregative.py | 214 +++++++++++++------------ quapy/method/base.py | 2 +- 7 files changed, 214 insertions(+), 192 deletions(-) create mode 100644 examples/explicit_loss_minimization.py delete mode 100644 examples/one_vs_all_svmperf.py diff --git a/examples/explicit_loss_minimization.py b/examples/explicit_loss_minimization.py new file mode 100644 index 0000000..cefbb3c --- /dev/null +++ b/examples/explicit_loss_minimization.py @@ -0,0 +1,72 @@ +import quapy as qp +from quapy.method.aggregative import newELM +from quapy.method.base import newOneVsAll +from quapy.model_selection import GridSearchQ +from quapy.protocol import USimplexPP + +""" +In this example, we will show hoy to define a quantifier based on explicit loss minimization (ELM). +ELM is a family of quantification methods relying on structured output learning. In particular, we will +showcase how to instantiate SVM(Q) as proposed by `Barranquero et al. 2015 +`_, and SVM(KLD) and SVM(nKLD) as proposed by +`Esuli et al. 2015 `_. + +All ELM quantifiers rely on SVMperf for optimizing a structured loss function (Q, KLD, or nKLD). Since these are +not part of the original SVMperf package by Joachims, you have to first download the SVMperf package, apply the +patch svm-perf-quantification-ext.patch (provided with QuaPy library), and compile the sources. +The script prepare_svmperf.sh does all the job. Simply run: + +>>> ./prepare_svmperf.sh + +Note that ELM quantifiers are nothing but a classify and count (CC) model instantiated with SVMperf as the +underlying classifier. E.g., SVM(Q) comes down to: + +>>> CC(SVMperf(svmperf_base, loss='q')) + +this means that ELM are aggregative quantifiers (since CC is an aggregative quantifier). QuaPy provides some helper +functions for simplify this; for example: + +>>> newSVMQ(svmperf_base) + +returns an instance of SVM(Q) (i.e., an instance of CC properly set to work with SVMperf optimizing for Q. + +Since we wan to explore the losses, we will instead use newELM. For this example we will create a quantifier for tweet +sentiment analysis considering three classes: negative, neutral, and positive. Since SVMperf is a binary classifier, +our quantifier will be binary as well. We will use a one-vs-all approach to work in multiclass model. +For more details about how one-vs-all works, we refer to the example "one_vs_all.py" and to the API documentation. +""" + +qp.environ['SAMPLE_SIZE'] = 100 +qp.environ['N_JOBS'] = -1 +qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification' + +quantifier = newOneVsAll(newELM()) +print(f'the quantifier is an instance of {quantifier.__class__.__name__}') + +# load a ternary dataset +train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test + +""" +model selection: +We explore the classifier's loss and the classifier's C hyperparameters. +Since our model is actually an instance of OneVsAllAggregative, we need to add the prefix "binary_quantifier", and +since our binary quantifier is an instance of CC, we need to add the prefix "classifier". +""" +param_grid = { + 'binary_quantifier__classifier__loss': ['q', 'kld', 'mae'], # classifier-dependent hyperparameter + 'binary_quantifier__classifier__C': [0.01, 1, 100], # classifier-dependent hyperparameter +} +print('starting model selection') +model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False) +quantifier = model_selection.fit(train_modsel).best_model() + +print('training on the whole training set') +train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test +quantifier.fit(train) + +# evaluation +mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae') + +print(f'MAE = {mae:.4f}') + + diff --git a/examples/one_vs_all.py b/examples/one_vs_all.py index 9e40551..8aad376 100644 --- a/examples/one_vs_all.py +++ b/examples/one_vs_all.py @@ -1,6 +1,6 @@ import quapy as qp -from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric -from quapy.method.base import getOneVsAll +from quapy.method.aggregative import MS2 +from quapy.method.base import newOneVsAll from quapy.model_selection import GridSearchQ from quapy.protocol import USimplexPP from sklearn.linear_model import LogisticRegression @@ -22,7 +22,7 @@ an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all case some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions during evaluation). """ -quantifier = getOneVsAll(MS2(LogisticRegression())) +quantifier = newOneVsAll(MS2(LogisticRegression())) print(f'the quantifier is an instance of {quantifier.__class__.__name__}') # load a ternary dataset diff --git a/examples/one_vs_all_svmperf.py b/examples/one_vs_all_svmperf.py deleted file mode 100644 index 8bf38bd..0000000 --- a/examples/one_vs_all_svmperf.py +++ /dev/null @@ -1,54 +0,0 @@ -import quapy as qp -from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric, SVMQ -from quapy.method.base import getOneVsAll -from quapy.model_selection import GridSearchQ -from quapy.protocol import USimplexPP -from sklearn.linear_model import LogisticRegression -import numpy as np - -""" -In this example, we will create a quantifier for tweet sentiment analysis considering three classes: negative, neutral, -and positive. We will use a one-vs-all approach using a binary quantifier for demonstration purposes. -""" - -qp.environ['SAMPLE_SIZE'] = 100 -qp.environ['N_JOBS'] = -1 -qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification' - -""" -Any binary quantifier can be turned into a single-label quantifier by means of getOneVsAll function. -This function returns an instance of OneVsAll quantifier. Actually, it either returns the subclass OneVsAllGeneric -when the quantifier is an instance of BaseQuantifier, and it returns OneVsAllAggregative when the quantifier is -an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all cases, using OneVsAllAggregative has -some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions -during evaluation). -""" -quantifier = getOneVsAll(SVMQ()) -print(f'the quantifier is an instance of {quantifier.__class__.__name__}') - -# load a ternary dataset -train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test - -""" -model selection: for this example, we are relying on the USimplexPP protocol, i.e., a variant of the -artificial-prevalence protocol that generates random samples (100 in this case) for randomly picked priors -from the unit simplex. The priors are sampled using the Kraemer algorithm. Note this is in contrast to the -standard APP protocol, that instead explores a prefixed grid of prevalence values. -""" -param_grid = { - 'binary_quantifier__classifier__C': np.logspace(-2,2,5), # classifier-dependent hyperparameter -} -print('starting model selection') -model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False) -quantifier = model_selection.fit(train_modsel).best_model() - -print('training on the whole training set') -train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test -quantifier.fit(train) - -# evaluation -mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae') - -print(f'MAE = {mae:.4f}') - - diff --git a/quapy/classification/svmperf.py b/quapy/classification/svmperf.py index 7921725..6c85084 100644 --- a/quapy/classification/svmperf.py +++ b/quapy/classification/svmperf.py @@ -1,5 +1,7 @@ import random +import shutil import subprocess +import tempfile from os import remove, makedirs from os.path import join, exists from subprocess import PIPE, STDOUT @@ -23,29 +25,34 @@ class SVMperf(BaseEstimator, ClassifierMixin): :param C: trade-off between training error and margin (default 0.01) :param verbose: set to True to print svm-perf std outputs :param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae". + :param host_folder: directory where to store the trained model; set to None (default) for using a tmp directory + (temporal directories are automatically deleted) """ # losses with their respective codes in svm_perf implementation valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27} - def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01'): + def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None): assert exists(svmperf_base), f'path {svmperf_base} does not seem to point to a valid path' self.svmperf_base = svmperf_base self.C = C self.verbose = verbose self.loss = loss + self.host_folder = host_folder - def set_params(self, **parameters): - """ - Set the hyper-parameters for svm-perf. Currently, only the `C` parameter is supported - - :param parameters: a `**kwargs` dictionary `{'C': }` - """ - assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported' - self.C = parameters['C'] - - def get_params(self, deep=True): - return {'C': self.C} + # def set_params(self, **parameters): + # """ + # Set the hyper-parameters for svm-perf. Currently, only the `C` and `loss` parameters are supported + # + # :param parameters: a `**kwargs` dictionary `{'C': }` + # """ + # assert sorted(list(parameters.keys())) == ['C', 'loss'], \ + # 'currently, only the C and loss parameters are supported' + # self.C = parameters.get('C', self.C) + # self.loss = parameters.get('loss', self.loss) + # + # def get_params(self, deep=True): + # return {'C': self.C, 'loss': self.loss} def fit(self, X, y): """ @@ -68,14 +75,14 @@ class SVMperf(BaseEstimator, ClassifierMixin): local_random = random.Random() # this would allow to run parallel instances of predict - random_code = '-'.join(str(local_random.randint(0,1000000)) for _ in range(5)) - # self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code) - # tmp dir are removed after the fit terminates in multiprocessing... moving to regular directories + __del__ - self.tmpdir = '.svmperf-' + random_code + random_code = 'svmperfprocess'+'-'.join(str(local_random.randint(0, 1000000)) for _ in range(5)) + if self.host_folder is None: + # tmp dir are removed after the fit terminates in multiprocessing... + self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code).name + else: + self.tmpdir = join(self.host_folder, '.' + random_code) makedirs(self.tmpdir, exist_ok=True) - # self.model = join(self.tmpdir.name, 'model-'+random_code) - # traindat = join(self.tmpdir.name, f'train-{random_code}.dat') self.model = join(self.tmpdir, 'model-'+random_code) traindat = join(self.tmpdir, f'train-{random_code}.dat') @@ -123,8 +130,6 @@ class SVMperf(BaseEstimator, ClassifierMixin): # in order to allow for parallel runs of predict, a random code is assigned local_random = random.Random() random_code = '-'.join(str(local_random.randint(0, 1000000)) for _ in range(5)) - # predictions_path = join(self.tmpdir.name, 'predictions'+random_code+'.dat') - # testdat = join(self.tmpdir.name, 'test'+random_code+'.dat') predictions_path = join(self.tmpdir, 'predictions' + random_code + '.dat') testdat = join(self.tmpdir, 'test' + random_code + '.dat') dump_svmlight_file(X, y, testdat, zero_based=False) @@ -145,5 +150,5 @@ class SVMperf(BaseEstimator, ClassifierMixin): def __del__(self): if hasattr(self, 'tmpdir'): - pass # shutil.rmtree(self.tmpdir, ignore_errors=True) + shutil.rmtree(self.tmpdir, ignore_errors=True) diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py index 01c19bc..39205de 100644 --- a/quapy/method/__init__.py +++ b/quapy/method/__init__.py @@ -3,15 +3,6 @@ from . import base from . import meta from . import non_aggregative -EXPLICIT_LOSS_MINIMIZATION_METHODS = { - aggregative.ELM, - aggregative.SVMQ, - aggregative.SVMAE, - aggregative.SVMKLD, - aggregative.SVMRAE, - aggregative.SVMNKLD -} - AGGREGATIVE_METHODS = { aggregative.CC, aggregative.ACC, @@ -26,7 +17,7 @@ AGGREGATIVE_METHODS = { aggregative.MAX, aggregative.MS, aggregative.MS2, -} | EXPLICIT_LOSS_MINIMIZATION_METHODS +} NON_AGGREGATIVE_METHODS = { diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 295af03..b872ba3 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -870,146 +870,155 @@ class DistributionMatching(AggregativeProbabilisticQuantifier): return r.x -class ELM(AggregativeQuantifier, BinaryQuantifier): +def newELM(svmperf_base=None, loss='01', C=1): """ - Class of Explicit Loss Minimization (ELM) quantifiers. + Explicit Loss Minimization (ELM) quantifiers. Quantifiers based on ELM represent a family of methods based on structured output learning; these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss measure. This implementation relies on `Joachims’ SVM perf `_ structured output learning algorithm, which has to be installed and patched for the purpose (see this `script `_). + This function equivalent to: - :param classifier: an instance of `SVM perf` or None - :param svmperf_base: path to the folder containing the binary files of `SVM perf` + >>> CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] :param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`) - :param kwargs: rest of SVM perf's parameters + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier """ - - def __init__(self, classifier=None, svmperf_base=None, loss='01', **kwargs): - self.svmperf_base = svmperf_base if svmperf_base is not None else qp.environ['SVMPERF_HOME'] - self.loss = loss - self.kwargs = kwargs - assert classifier is None or isinstance(classifier, SVMperf), \ - 'param error "classifier": instances of ELM can only be instantiated with classifier SVMperf. ' \ - 'This parameter should either be an instance of SVMperf or None, in which case an SVMperf object ' \ - 'will be instantiaded using "svmperf_base" and "loss"' - if classifier is None: - self.classifier = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) - else: - if classifier.loss != loss: - print(f'[warning]: the loss of the SVMperf object passed to arg "classifier" ({classifier.loss}) ' - f'does not coincide with arg "loss" ({loss}); the latter will be ignored') - self.classifier = classifier - - def fit(self, data: LabelledCollection, fit_classifier=True): - self._check_binary(data, self.__class__.__name__) - assert fit_classifier, 'the method requires that fit_classifier=True' - self.classifier.fit(data.instances, data.labels) - return self - - def aggregate(self, classif_predictions: np.ndarray): - return F.prevalence_from_labels(classif_predictions, self.classes_) - - def classify(self, X, y=None): - return self.classifier.predict(X) + if svmperf_base is None: + svmperf_base = qp.environ['SVMPERF_HOME'] + assert svmperf_base is not None, \ + 'param svmperf_base was not specified, and the variable SVMPERF_HOME has not been set in the environment' + return CC(SVMperf(svmperf_base, loss=loss, C=C)) -class SVMQ(ELM): +def newSVMQ(svmperf_base=None, C=1): """ - SVM(Q), which attempts to minimize the `Q` loss combining a classification-oriented loss and a - quantification-oriented loss, as proposed by + SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the `Q` loss combining a + classification-oriented loss and a quantification-oriented loss, as proposed by `Barranquero et al. 2015 `_. Equivalent to: - >>> ELM(svmperf_base, loss='q', **kwargs) + >>> CC(SVMperf(svmperf_base, loss='q', C=C)) - :param classifier: not used, added for compatibility - :param svmperf_base: path to the folder containing the binary files of `SVM perf` - :param kwargs: rest of SVM perf's parameters + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf `_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script `_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier """ + return newELM(svmperf_base, loss='q', C=C) - def __init__(self, classifier=None, svmperf_base=None, **kwargs): - assert classifier == None, \ - 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' - super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs) - - -class SVMKLD(ELM): +def newSVMKLD(svmperf_base=None, C=1): """ - SVM(KLD), which attempts to minimize the Kullback-Leibler Divergence as proposed by + SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence + as proposed by `Esuli et al. 2015 `_. + Equivalent to: + + >>> CC(SVMperf(svmperf_base, loss='kld', C=C)) + + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf `_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script `_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier + """ + return newELM(svmperf_base, loss='kld', C=C) + + +def newSVMKLD(svmperf_base=None, C=1): + """ + SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence + normalized via the logistic function, as proposed by `Esuli et al. 2015 `_. Equivalent to: - >>> ELM(svmperf_base, loss='kld', **kwargs) + >>> CC(SVMperf(svmperf_base, loss='nkld', C=C)) - :param classifier: not used, added for compatibility - :param svmperf_base: path to the folder containing the binary files of `SVM perf` - :param kwargs: rest of SVM perf's parameters + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf `_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script `_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier """ + return newELM(svmperf_base, loss='nkld', C=C) - def __init__(self, classifier=None, svmperf_base=None, **kwargs): - assert classifier == None, \ - 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' - super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs) - - -class SVMNKLD(ELM): +def newSVMAE(svmperf_base=None, C=1): """ - SVM(NKLD), which attempts to minimize a version of the the Kullback-Leibler Divergence normalized - via the logistic function, as proposed by - `Esuli et al. 2015 `_. - Equivalent to: - - >>> ELM(svmperf_base, loss='nkld', **kwargs) - - :param classifier: not used, added for compatibility - :param svmperf_base: path to the folder containing the binary files of `SVM perf` - :param kwargs: rest of SVM perf's parameters - """ - - def __init__(self, classifier=None, svmperf_base=None, **kwargs): - assert classifier == None, \ - 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' - super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs) - - -class SVMAE(ELM): - """ - SVM(AE), which attempts to minimize Absolute Error as first used by + SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by `Moreo and Sebastiani, 2021 `_. Equivalent to: - >>> ELM(svmperf_base, loss='mae', **kwargs) + >>> CC(SVMperf(svmperf_base, loss='mae', C=C)) - :param classifier: not used, added for compatibility - :param svmperf_base: path to the folder containing the binary files of `SVM perf` - :param kwargs: rest of SVM perf's parameters + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf `_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script `_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier """ + return newELM(svmperf_base, loss='mae', C=C) - def __init__(self, classifier=None, svmperf_base=None, **kwargs): - assert classifier == None, \ - 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' - super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs) - - -class SVMRAE(ELM): +def newSVMAE(svmperf_base=None, C=1): """ - SVM(RAE), which attempts to minimize Relative Absolute Error as first used by - `Moreo and Sebastiani, 2021 `_. + SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first + used by `Moreo and Sebastiani, 2021 `_. Equivalent to: - >>> ELM(svmperf_base, loss='mrae', **kwargs) + >>> CC(SVMperf(svmperf_base, loss='mrae', C=C)) - :param classifier: not used, added for compatibility - :param svmperf_base: path to the folder containing the binary files of `SVM perf` - :param kwargs: rest of SVM perf's parameters + Quantifiers based on ELM represent a family of methods based on structured output learning; + these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss + measure. This implementation relies on + `Joachims’ SVM perf `_ structured output + learning algorithm, which has to be installed and patched for the purpose (see this + `script `_). + This function is a wrapper around CC(SVMperf(svmperf_base, loss, C)) + + :param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default) + this path will be obtained from qp.environ['SVMPERF_HOME'] + :param C: trade-off between training error and margin (default 0.01) + :return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the + underlying classifier """ - - def __init__(self, classifier=None, svmperf_base=None, **kwargs): - assert classifier == None, \ - 'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.' - super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs) + return newELM(svmperf_base, loss='mrae', C=C) class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): @@ -1267,7 +1276,6 @@ ProbabilisticAdjustedClassifyAndCount = PACC ExpectationMaximizationQuantifier = EMQ SLD = EMQ HellingerDistanceY = HDy -ExplicitLossMinimisation = ELM MedianSweep = MS MedianSweep2 = MS2 diff --git a/quapy/method/base.py b/quapy/method/base.py index 1aa64e2..e0363f1 100644 --- a/quapy/method/base.py +++ b/quapy/method/base.py @@ -54,7 +54,7 @@ class OneVsAll: pass -def getOneVsAll(binary_quantifier, n_jobs=None): +def newOneVsAll(binary_quantifier, n_jobs=None): assert isinstance(binary_quantifier, BaseQuantifier), \ f'{binary_quantifier} does not seem to be a Quantifier' if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier): From c6086474750657515582b5e0dfaed6d928d2bc99 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 13 Feb 2023 19:27:48 +0100 Subject: [PATCH 56/59] some bug fixes here and there --- docs/build/html/genindex.html | 36 +-- docs/build/html/objects.inv | Bin 2859 -> 2822 bytes docs/build/html/quapy.classification.html | 15 +- docs/build/html/quapy.html | 107 ++++--- docs/build/html/quapy.method.html | 342 ++++++++++------------ docs/build/html/searchindex.js | 2 +- examples/quanet_example.py | 35 +++ quapy/classification/neural.py | 6 +- quapy/data/base.py | 11 + quapy/data/preprocessing.py | 9 +- quapy/evaluation.py | 12 +- quapy/method/aggregative.py | 4 +- quapy/method/meta.py | 3 +- quapy/method/neural.py | 25 +- quapy/protocol.py | 30 +- quapy/tests/test_evaluation.py | 4 - quapy/tests/test_hierarchy.py | 1 - quapy/tests/test_labelcollection.py | 21 ++ quapy/tests/test_methods.py | 83 ++---- 19 files changed, 408 insertions(+), 338 deletions(-) create mode 100644 examples/quanet_example.py create mode 100644 quapy/tests/test_labelcollection.py diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html index fc438e0..bc41b0c 100644 --- a/docs/build/html/genindex.html +++ b/docs/build/html/genindex.html @@ -106,8 +106,6 @@
  • (quapy.method.aggregative.DistributionMatching method)
  • (quapy.method.aggregative.DyS method) -
  • -
  • (quapy.method.aggregative.ELM method)
  • (quapy.method.aggregative.EMQ method)
  • @@ -198,8 +196,6 @@
  • (quapy.method.aggregative.AggregativeProbabilisticQuantifier method)
  • (quapy.method.aggregative.AggregativeQuantifier method) -
  • -
  • (quapy.method.aggregative.ELM method)
  • (quapy.method.aggregative.OneVsAllAggregative method)
  • @@ -283,8 +279,6 @@
  • EEMQ() (in module quapy.method.meta)
  • EHDy() (in module quapy.method.meta) -
  • -
  • ELM (class in quapy.method.aggregative)
  • EM() (quapy.method.aggregative.EMQ class method)
  • @@ -307,8 +301,6 @@
  • evaluation_report() (in module quapy.evaluation)
  • ExpectationMaximizationQuantifier (in module quapy.method.aggregative) -
  • -
  • ExplicitLossMinimisation (in module quapy.method.aggregative)
  • @@ -350,8 +342,6 @@
  • (quapy.method.aggregative.DistributionMatching method)
  • (quapy.method.aggregative.DyS method) -
  • -
  • (quapy.method.aggregative.ELM method)
  • (quapy.method.aggregative.EMQ method)
  • @@ -435,8 +425,6 @@
  • get_probability_distribution() (in module quapy.method.meta)
  • get_quapy_home() (in module quapy.util) -
  • -
  • getOneVsAll() (in module quapy.method.base)
  • getPteCondEstim() (quapy.method.aggregative.ACC class method) @@ -618,9 +606,21 @@
  • NBVSCalibration (class in quapy.classification.calibration)
  • NeuralClassifierTrainer (class in quapy.classification.neural) +
  • +
  • newELM() (in module quapy.method.aggregative) +
  • +
  • newOneVsAll() (in module quapy.method.base) +
  • +
  • newSVMAE() (in module quapy.method.aggregative)
  • diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv index 3c30cfbd5de325c17c09e61fe66b287eec1d512e..e143639536e75db983281e157d741a9d03376ade 100644 GIT binary patch delta 2733 zcmV;e3R3l}7KRp(b$`uv;y4z7_jwBD^lal(m2+m(yA+|ja~cR0s7&=rN7xt8#J1d$ zAfZ?N8uNPdBqPZ-*dW0AA+<;Xw(fWJbMMuUCFjpIF1Ed5`*TKVnY%nAm=t_5~5z9zm>V@}X)` zy!la@1w$L0{s`>_gM`H52dYMeN)s{z*9tu$15a629w_uyv}j1*o^k#ypEbH1P~^Hr6@npN@9-&smn z)E|#?5T1_FVnqp!*BRsjK{kgZA^9>)!E1gv`)_k)JCVG~Ef#{2`C32-@P@{CzD_aQ zB2NAS{}DxnA5LeZV2xM7uGf0Cg5b?pgTuF9O}M$F(}IV@>aPBR`>aPC~jMW%23( z3prN~<$rw*VuZQ+lv13O{jN`r`6iRqx(GBtuV~TqWmHo=%6>qTqkx#Gc!L(}Jl-vT z`WJquAVB|rhz!&ZA7VE+B?(HYTJCdk#6%j9q0QCh6D_T8&8u}y2eIg+KkTK?h(*&P zC1NVn5{~p;Q=IO`nUh?G6=Ou{1~|LO+70pgOMm~xxd6)VorQB$W#1$BTwuYSbD+Bq zB!CN#EOhA~&Xb0C4rvNWpUU5{d0Ehw5Pc7lk?4C7h_emDadsI4E2it*!FmiNCnqv+ zoczha$;f*MjE{#JaDhxoc4M;3fxMiA`<9X8Oh$9#VBVR97K~~ZP zgFVLFpE(m#koWFB-Q9UnWY8e+#=RKgjR;ZBa~v3QN^e;aGSW#)h}CnD%53dgxFsJw zDifX6D?FxU(ry_!0!27U&>Lk|Zhrx3o`3#$(9pTHEm!U}n1=DB8+jL!91Sj#)5UcR zD5mK;%efCp#_~MDEUC)liV^TG?!GJb zm}L(K?18AhvRi;^$j@lODL4w_B%vMb>+zUzJgdv)l=t&{^rcwfy9Xvm%|hrOf{rFcGdF634f2RZsO&V z!BPiWe?oNM?;lUanvhGCU~t)jAN#vTwJrt$ZmLub2@(=hgKM4x@4 z7J|L2kt@-|H=nmR8==)$!+$HgUV$@I-1HB3f#Zl81!`H@{f)4QS5=qjSWhNgn3^+^ zJ?rveJM}5+b{eQ?669Hu!RV_;Nu;#=@zfs;Cez2^)7aA>6=`(;9X-wl&jC9j4k9$g|pyrhm$Rcvg)xO^>P> zO}&Kge59rA;e7fAQ218Co^2z)dyeE-L<231A)3G;Mo<&K+1w=9bZ!!A zJ~s(4p_>Gr(M|j+IanPBsF~(u>`D_$)A89*?CJ2qmoOT6w55~|)0PV4*_N|^eex*D zY{h`DXqr3~V%PtN=6{8$D>K@&_Ix_wm{HEr5^_-I#nFD{-n=LpfQu3+m>)ccx504a z&6vK7?gt?#zn8A;i}@M?^6XNi6Fb7YtjFrzSn7QO&Z|}if{RC(Fs@4_B};FZ^($6A zSn`qmRXGuf10X!s=!`hmRyyQ(HIp>^W-Z$pzWjq74PS{ygMVt!YOggKxJwRPLPaTB z&1PLhCmv4ZX?NVo*ArMHJi5RWIJF^5@8Hn;F1%{(6ja(P3j%fsR7Q?_ zyfWfkBaIQpee;%K&}T9o0t3yF{|<)(N~Bk{*V&M~LP}PYCX?O%54{}ZGTn93;bcBe zitG)7CXgZyjZ+hyHj*94MZYqVOaZfaWq5>iUT0G_ntuZqNGC89!GdG1aSOm%PqpBj zX%Pzw_sC7Tq9APrHBMt++N@NhRCG25mAE=4iRJ`z`K#MzwIji0D#seBt!P5r zQY3#;TPBz zIN{~^Q+6V|4TK4jeY)QKVAIOqrsYOy8++W#*qh~%tfO@{I3102ElBr%T_uhNn5A18 z_KJ&Dqm#!B)B_>1Ksgl%-U;tC=4i^ws(}5ZxRYc;-*WYe2(%z6xJk9L%sVrbdF!ue zMSrvIsxw^ZCYSb{b{4W1An~hmI|iJtS=>E)9B)9MjH@)qr;_2RbMSr=>yR>`(zcW zk>cNw#>xr>Rb`oM|M&NQd9QvwpDxrtZlsqRG!@(1`o^vhDnOJv%gE5Gxav}wShw#@ zgK+^q*~*&D+d4F(sbOz4<;Pxu<5#={^M7qP5;VM8t7ray4=t>I=hhKycbK==d?qtj nOR$Q&N1}}djb8hNO(o9ty^mQr2dU06V7IMMpG&zjr)}h;UBB*3l`+#HI@h>kWyW6RuC??AE$jzrM5>Rmn`{E zwJ2WyD9w_iHBNto_JWgy#_|WMMujR9vI5s~Js}-W*;WzA^;WcK$ljlE{x)O_-Qmw< z!3mTLRom125P#G5vXTV>Yr!06;`+J*O^nWp!c2q9>cj{3q2xFhbH+1YRXJJl%0K!~ zmJ$~A$K#wx&xz4uMG1>n8ObFg*^DF!%@<)BUh~7*f13;AMDwb)SV~T3D@j6t*DS`f zRf_o*3HlfDk0>hqaQZR|Hh2~6dZSk>2tIr@IQ;n4>!mNN7bivXDotJ-92a1nU6nLqPUdTr7wTwgg+9Pa~ zQ1H1bS`gQ&`Sl$8u__pWcg&Nk@q^F&V-?q!jmKKTFImSruC;g_Yuey7d_t?7hMaIoW;6$fW%O0u0DotOC)dGefjZ9b;jwjq#L=)4=wHrWHq>e(4^dU3c&{+Dmq+ErBLnKs-Msp(LCHmHX zY=lL-fVp!G_89Ye=1fcpe0TTh?#_z>PJ@zn;l+^eRLU}ga$vk;ZrKsw>A(_Vb%&)g zTe}f%$w!aML|^K?Bhvk1G$-1IP^uhycLN}tGv4{cn=T#UnE*4B1nq>6RGG9yIF21eh<`!@jAPErUL*TkJ-*Sg%UIJfEzVNuww zB3AYA?o*p~h>S70>{K2Sw$-WgrzhOPPNe=w>@+n#f~ggZOH27`BlcAIdLUSTZ#=qU zZsJ}f`V9hRSm6kNplYq`=Aat#FMq5Mj5rG8;T(%A$TTtR9iQmvZIY(=V z{kxf7gQ9MF1j<_;u5K^(#q8%z~aAAe8P@{1EQ zw)zq;7Mv`!qxC05_x=8HD%ONtlHUDpN*1g@)s331FUOp{Ss}m?OeE>EJQ)Z{GhZC6 zhH6#Ro=%+)@Zweevbrz4z0Z@r+DbOx{OXR~e{LP6{#touQ^p<;9%k_OpF>x>QPVK? zZbYBGqZWd_>yazb!w;YLY=1UFt60OUx?YYml;8AscY*zgiUPGPEPo>`<7KrZI`)$Z z7pCWoWY4~Q*iL=gx}64U8h|`oG8m3LS_0BxTB1N9T#MrP_Q>d~Pmheg!aq;_(O@!t z96pUbr>G*0?!TkQm%(!YRG5JVkpBQ(JW|*lwkaTi#HDk943Pe%KYzEw3ZIVKK(Y{1 zIb=eNXbuX*F3I%3{`Ro-Fj9~XGnxwIIhsd%5`TDh4OqNK*NmoK!goH>VA43Bz7taT zcEO%>=ZSjC$*Z_e)BAQZ>exgBEsP*Qd4 z9H3{KS?`rHc+(Tmp?}!Z;e#(>v321+p$-IFdfQu4R zaC`WP1NL3ExAoyR_2LKTO+W*|Wzb9**PW-5rOzMxHLDJZe1BwrRpU(K00@sQPb1EC zlnyyw!vrBvvzP4*AO6FRhOb1UK^2M9U`PQ?%>Xz-E?T99OgkAaQ6963r^9DN_|`9IxE{0;ny@$ael$- zgn_H=>i*r`K!4w&q_G`sk39L>k}htedS(-rl`)Z{Qr>P>7zpRyfnd!#{jjq&>ipxt z-Ek-1BVmp3SOQPr)cH1jAVwc!@#?ixP_S1X1nl6bj2w4=WyHC98Y4{j)-A(CKjP}( z7+8+{4~!g8BE2QP%7*lfq;$zxGT9xc(VIRl8JUv~Cx7#CQemo(Sa8feZUHzuQ!O}WM#O?5JZe)WyMfHj#W8Dir`<0t3$$pY8RDHwIcQS zs5$m^Cfi$1H=?6iJ+KUdn-SfZV&D!!_U$j9&hjoZ2aI!m4P(VIp5-rq$OPPENhP#|aPdCoCen^Oy;OHC-QZ zuo>lV)AHQ8jXmyV?9K8>*3mi}oQ_7ilC=A}t_nv3%+oEnz2aim=;ARa>YfFiqnt@Z zHh)xfiaDCH&=q8#6nBzL*n6(txPj(0C2mr!DvQnxW#0N%v}9R#)fq1Ikb^y^orUZL zX#A$89VddVc-*~u0B)s>;vVWZ9SE8HoUpie`?*(N)1{!yVE>$1 zqtyOtUG7m$cjd7TN;a?8Wk=Q9yBo!-OMhjFukDAXs_$+_DpR?g?vl@@c>Ajoqc8S$ z7jGsIhD)_e))1_()m2-NynM)8mC^1}m8!3nrS$XVH~V50s!`(KB#o67N~YQ}+5YeE z|B7Dy9!1@#f84+MT7&tK+ImWnn$OHx0%G_+$$`o40jnMoUw} z-e}5?y#mK?ctOm6UgJp6@TRh!`44lnu=*8dN3h+q(Y@x2&RT+%-#rp-Y-se_4=h#U YLMPAd9xJbvLS=H@o2at?2hE+F&yp5w`Tzg` diff --git a/docs/build/html/quapy.classification.html b/docs/build/html/quapy.classification.html index 8e2a6b9..f5684c6 100644 --- a/docs/build/html/quapy.classification.html +++ b/docs/build/html/quapy.classification.html @@ -801,7 +801,7 @@ applied, meaning that if the longest document in the batch is shorter than

    quapy.classification.svmperf

    -class quapy.classification.svmperf.SVMperf(svmperf_base, C=0.01, verbose=False, loss='01')
    +class quapy.classification.svmperf.SVMperf(svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None)

    Bases: BaseEstimator, ClassifierMixin

    A wrapper for the SVM-perf package by Thorsten Joachims. When using losses for quantification, the source code has to be patched. See @@ -821,6 +821,8 @@ for further details.

  • C – trade-off between training error and margin (default 0.01)

  • verbose – set to True to print svm-perf std outputs

  • loss – the loss to optimize for. Available losses are “01”, “f1”, “kld”, “nkld”, “q”, “qacc”, “qf1”, “qgm”, “mae”, “mrae”.

  • +
  • host_folder – directory where to store the trained model; set to None (default) for using a tmp directory +(temporal directories are automatically deleted)

  • @@ -873,17 +875,6 @@ instances in X

    -
    -
    -set_params(**parameters)
    -

    Set the hyper-parameters for svm-perf. Currently, only the C parameter is supported

    -
    -
    Parameters:
    -

    parameters – a **kwargs dictionary {‘C’: <float>}

    -
    -
    -
    -
    valid_losses = {'01': 0, 'f1': 1, 'kld': 12, 'mae': 26, 'mrae': 27, 'nkld': 13, 'q': 22, 'qacc': 23, 'qf1': 24, 'qgm': 25}
    diff --git a/docs/build/html/quapy.html b/docs/build/html/quapy.html index d72b33d..b522f38 100644 --- a/docs/build/html/quapy.html +++ b/docs/build/html/quapy.html @@ -550,13 +550,13 @@ in the grid multiplied by repeat

    sample(index)
    -

    Extract one sample determined by the given parameters

    +

    Realizes the sample given the index of the instances.

    Parameters:
    -

    params – all the necessary parameters to generate a sample

    +

    index – indexes of the instances to select

    Returns:
    -

    one sample (the same sample has to be generated for the same parameters)

    +

    an instance of qp.data.LabelledCollection

    @@ -564,10 +564,10 @@ in the grid multiplied by repeat

    samples_parameters()
    -

    This function has to return all the necessary parameters to replicate the samples

    +

    Return all the necessary parameters to replicate the samples as according to the APP protocol.

    Returns:
    -

    a list of parameters, each of which serves to deterministically generate a sample

    +

    a list of indexes that realize the APP sampling

    @@ -575,10 +575,10 @@ in the grid multiplied by repeat

    total()
    -

    Indicates the total number of samples that the protocol generates.

    +

    Returns the number of samples that will be generated

    Returns:
    -

    The number of samples to generate if known, or None otherwise.

    +

    int

    @@ -666,10 +666,11 @@ the sequence will be consistent every time the protocol is called.

    Parameters:
      -
    • domainA

    • -
    • domainB

    • -
    • sample_size

    • -
    • repeats

    • +
    • domainA – one domain, an object of qp.data.LabelledCollection

    • +
    • domainB – another domain, an object of qp.data.LabelledCollection

    • +
    • sample_size – integer, the number of instances in each sample; if None (default) then it is taken from +qp.environ[“SAMPLE_SIZE”]. If this is not set, a ValueError exception is raised.

    • +
    • repeats – int, number of samples to draw for every mixture rate

    • prevalence – the prevalence to preserv along the mixtures. If specified, should be an array containing one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence will be taken from the domain A (default).

    • @@ -684,13 +685,13 @@ will be the same every time the protocol is called)

      sample(indexes)
      -

      Extract one sample determined by the given parameters

      +

      Realizes the sample given a pair of indexes of the instances from A and B.

      Parameters:
      -

      params – all the necessary parameters to generate a sample

      +

      indexes – indexes of the instances to select from A and B

      Returns:
      -

      one sample (the same sample has to be generated for the same parameters)

      +

      an instance of qp.data.LabelledCollection

      @@ -698,10 +699,10 @@ will be the same every time the protocol is called)

      samples_parameters()
      -

      This function has to return all the necessary parameters to replicate the samples

      +

      Return all the necessary parameters to replicate the samples as according to the this protocol.

      Returns:
      -

      a list of parameters, each of which serves to deterministically generate a sample

      +

      a list of zipped indexes (from A and B) that realize the sampling

      @@ -709,10 +710,10 @@ will be the same every time the protocol is called)

      total()
      -

      Indicates the total number of samples that the protocol generates.

      +

      Returns the number of samples that will be generated (equals to “repeats * mixture_points”)

      Returns:
      -

      The number of samples to generate if known, or None otherwise.

      +

      int

      @@ -742,13 +743,13 @@ to “labelled_collection” to get instead instances of LabelledCollection

      <
      sample(index)
      -

      Extract one sample determined by the given parameters

      +

      Realizes the sample given the index of the instances.

      Parameters:
      -

      params – all the necessary parameters to generate a sample

      +

      index – indexes of the instances to select

      Returns:
      -

      one sample (the same sample has to be generated for the same parameters)

      +

      an instance of qp.data.LabelledCollection

      @@ -756,10 +757,10 @@ to “labelled_collection” to get instead instances of LabelledCollection

      <
      samples_parameters()
      -

      This function has to return all the necessary parameters to replicate the samples

      +

      Return all the necessary parameters to replicate the samples as according to the NPP protocol.

      Returns:
      -

      a list of parameters, each of which serves to deterministically generate a sample

      +

      a list of indexes that realize the NPP sampling

      @@ -767,10 +768,10 @@ to “labelled_collection” to get instead instances of LabelledCollection

      <
      total()
      -

      Indicates the total number of samples that the protocol generates.

      +

      Returns the number of samples that will be generated (equals to “repeats”)

      Returns:
      -

      The number of samples to generate if known, or None otherwise.

      +

      int

      @@ -781,6 +782,7 @@ to “labelled_collection” to get instead instances of LabelledCollection

      <
      class quapy.protocol.OnLabelledCollectionProtocol

      Bases: object

      +

      Protocols that generate samples from a qp.data.LabelledCollection object.

      RETURN_TYPES = ['sample_prev', 'labelled_collection']
      @@ -789,17 +791,52 @@ to “labelled_collection” to get instead instances of LabelledCollection

      <
      classmethod get_collator(return_type='sample_prev')
      -
      +

      Returns a collator function, i.e., a function that prepares the yielded data

      +
      +
      Parameters:
      +

      return_type – either ‘sample_prev’ (default) if the collator is requested to yield tuples of +(sample, prevalence), or ‘labelled_collection’ when it is requested to yield instances of +qp.data.LabelledCollection

      +
      +
      Returns:
      +

      the collator function (a callable function that takes as input an instance of +qp.data.LabelledCollection)

      +
      +
      +
      get_labelled_collection()
      -
      +

      Returns the labelled collection on which this protocol acts.

      +
      +
      Returns:
      +

      an object of type qp.data.LabelledCollection

      +
      +
      +
    on_preclassified_instances(pre_classifications, in_place=False)
    -
    +

    Returns a copy of this protocol that acts on a modified version of the original +qp.data.LabelledCollection in which the original instances have been replaced +with the outputs of a classifier for each instance. (This is convenient for speeding-up +the evaluation procedures for many samples, by pre-classifying the instances in advance.)

    +
    +
    Parameters:
    +
      +
    • pre_classifications – the predictions issued by a classifier, typically an array-like +with shape (n_instances,) when the classifier is a hard one, or with shape +(n_instances, n_classes) when the classifier is a probabilistic one.

    • +
    • in_place – whether or not to apply the modification in-place or in a new copy (default).

    • +
    +
    +
    Returns:
    +

    a copy of this protocol

    +
    +
    +
    @@ -830,13 +867,13 @@ to “labelled_collection” to get instead instances of LabelledCollection

    <
    sample(index)
    -

    Extract one sample determined by the given parameters

    +

    Realizes the sample given the index of the instances.

    Parameters:
    -

    params – all the necessary parameters to generate a sample

    +

    index – indexes of the instances to select

    Returns:
    -

    one sample (the same sample has to be generated for the same parameters)

    +

    an instance of qp.data.LabelledCollection

    @@ -844,10 +881,10 @@ to “labelled_collection” to get instead instances of LabelledCollection

    <
    samples_parameters()
    -

    This function has to return all the necessary parameters to replicate the samples

    +

    Return all the necessary parameters to replicate the samples as according to the USimplexPP protocol.

    Returns:
    -

    a list of parameters, each of which serves to deterministically generate a sample

    +

    a list of indexes that realize the USimplexPP sampling

    @@ -855,10 +892,10 @@ to “labelled_collection” to get instead instances of LabelledCollection

    <
    total()
    -

    Indicates the total number of samples that the protocol generates.

    +

    Returns the number of samples that will be generated (equals to “repeats”)

    Returns:
    -

    The number of samples to generate if known, or None otherwise.

    +

    int

    diff --git a/docs/build/html/quapy.method.html b/docs/build/html/quapy.method.html index 19a1e0b..4525456 100644 --- a/docs/build/html/quapy.method.html +++ b/docs/build/html/quapy.method.html @@ -458,76 +458,6 @@ learner has been trained outside the quantifier.

    -
    -
    -class quapy.method.aggregative.ELM(svmperf_base=None, loss='01', **kwargs)
    -

    Bases: AggregativeQuantifier, BinaryQuantifier

    -

    Class of Explicit Loss Minimization (ELM) quantifiers. -Quantifiers based on ELM represent a family of methods based on structured output learning; -these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss -measure. This implementation relies on -Joachims’ SVM perf structured output -learning algorithm, which has to be installed and patched for the purpose (see this -script).

    -
    -
    Parameters:
    -
    -
    -
    -
    -
    -aggregate(classif_predictions: ndarray)
    -

    Implements the aggregation of label predictions.

    -
    -
    Parameters:
    -

    classif_predictionsnp.ndarray of label predictions

    -
    -
    Returns:
    -

    np.ndarray of shape (n_classes,) with class prevalence estimates.

    -
    -
    -
    - -
    -
    -classify(X, y=None)
    -

    Provides the label predictions for the given instances. The predictions should respect the format expected by -aggregate(), i.e., posterior probabilities for probabilistic quantifiers, or crisp predictions for -non-probabilistic quantifiers

    -
    -
    Parameters:
    -

    instances – array-like

    -
    -
    Returns:
    -

    np.ndarray of shape (n_instances,) with label predictions

    -
    -
    -
    - -
    -
    -fit(data: LabelledCollection, fit_classifier=True)
    -

    Trains the aggregative quantifier

    -
    -
    Parameters:
    -
      -
    • data – a quapy.data.base.LabelledCollection consisting of the training data

    • -
    • fit_classifier – whether or not to train the learner (default is True). Set to False if the -learner has been trained outside the quantifier.

    • -
    -
    -
    Returns:
    -

    self

    -
    -
    -
    - -
    -
    class quapy.method.aggregative.EMQ(classifier: BaseEstimator, exact_train_prev=True, recalib=None)
    @@ -627,12 +557,6 @@ learner has been trained outside the quantifier.

    alias of EMQ

    -
    -
    -quapy.method.aggregative.ExplicitLossMinimisation
    -

    alias of ELM

    -
    -
    class quapy.method.aggregative.HDy(classifier: BaseEstimator, val_split=0.4)
    @@ -782,7 +706,7 @@ validation data, or as an integer, indicating that the misclassification rates s
    -class quapy.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs=None, parallel_backend='loky')
    +class quapy.method.aggregative.OneVsAllAggregative(binary_quantifier, n_jobs=None, parallel_backend='multiprocessing')

    Bases: OneVsAllGeneric, AggregativeQuantifier

    Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the @@ -1029,108 +953,6 @@ learner has been trained outside the quantifier.

    -
    -
    -class quapy.method.aggregative.SVMAE(svmperf_base=None, **kwargs)
    -

    Bases: ELM

    -

    SVM(AE), which attempts to minimize Absolute Error as first used by -Moreo and Sebastiani, 2021. -Equivalent to:

    -
    >>> ELM(svmperf_base, loss='mae', **kwargs)
    -
    -
    -
    -
    Parameters:
    -
      -
    • svmperf_base – path to the folder containing the binary files of SVM perf

    • -
    • kwargs – rest of SVM perf’s parameters

    • -
    -
    -
    -
    - -
    -
    -class quapy.method.aggregative.SVMKLD(svmperf_base=None, **kwargs)
    -

    Bases: ELM

    -

    SVM(KLD), which attempts to minimize the Kullback-Leibler Divergence as proposed by -Esuli et al. 2015. -Equivalent to:

    -
    >>> ELM(svmperf_base, loss='kld', **kwargs)
    -
    -
    -
    -
    Parameters:
    -
      -
    • svmperf_base – path to the folder containing the binary files of SVM perf

    • -
    • kwargs – rest of SVM perf’s parameters

    • -
    -
    -
    -
    - -
    -
    -class quapy.method.aggregative.SVMNKLD(svmperf_base=None, **kwargs)
    -

    Bases: ELM

    -

    SVM(NKLD), which attempts to minimize a version of the the Kullback-Leibler Divergence normalized -via the logistic function, as proposed by -Esuli et al. 2015. -Equivalent to:

    -
    >>> ELM(svmperf_base, loss='nkld', **kwargs)
    -
    -
    -
    -
    Parameters:
    -
      -
    • svmperf_base – path to the folder containing the binary files of SVM perf

    • -
    • kwargs – rest of SVM perf’s parameters

    • -
    -
    -
    -
    - -
    -
    -class quapy.method.aggregative.SVMQ(svmperf_base=None, **kwargs)
    -

    Bases: ELM

    -

    SVM(Q), which attempts to minimize the Q loss combining a classification-oriented loss and a -quantification-oriented loss, as proposed by -Barranquero et al. 2015. -Equivalent to:

    -
    >>> ELM(svmperf_base, loss='q', **kwargs)
    -
    -
    -
    -
    Parameters:
    -
      -
    • svmperf_base – path to the folder containing the binary files of SVM perf

    • -
    • kwargs – rest of SVM perf’s parameters

    • -
    -
    -
    -
    - -
    -
    -class quapy.method.aggregative.SVMRAE(svmperf_base=None, **kwargs)
    -

    Bases: ELM

    -

    SVM(RAE), which attempts to minimize Relative Absolute Error as first used by -Moreo and Sebastiani, 2021. -Equivalent to:

    -
    >>> ELM(svmperf_base, loss='mrae', **kwargs)
    -
    -
    -
    -
    Parameters:
    -
      -
    • svmperf_base – path to the folder containing the binary files of SVM perf

    • -
    • kwargs – rest of SVM perf’s parameters

    • -
    -
    -
    -
    -
    class quapy.method.aggregative.T50(classifier: BaseEstimator, val_split=0.4)
    @@ -1247,6 +1069,162 @@ validation data, or as an integer, indicating that the misclassification rates s quapy.method.aggregative.cross_generate_predictions_depr(data, classifier, val_split, probabilistic, fit_classifier, method_name='')
    +
    +
    +quapy.method.aggregative.newELM(svmperf_base=None, loss='01', C=1)
    +

    Explicit Loss Minimization (ELM) quantifiers. +Quantifiers based on ELM represent a family of methods based on structured output learning; +these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss +measure. This implementation relies on +Joachims’ SVM perf structured output +learning algorithm, which has to be installed and patched for the purpose (see this +script). +This function equivalent to:

    +
    >>> CC(SVMperf(svmperf_base, loss, C))
    +
    +
    +
    +
    Parameters:
    +
      +
    • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) +this path will be obtained from qp.environ[‘SVMPERF_HOME’]

    • +
    • loss – the loss to optimize (see quapy.classification.svmperf.SVMperf.valid_losses)

    • +
    • C – trade-off between training error and margin (default 0.01)

    • +
    +
    +
    Returns:
    +

    returns an instance of CC set to work with SVMperf (with loss and C set properly) as the +underlying classifier

    +
    +
    +
    + +
    +
    +quapy.method.aggregative.newSVMAE(svmperf_base=None, C=1)
    +

    SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by +Moreo and Sebastiani, 2021. +Equivalent to:

    +
    >>> CC(SVMperf(svmperf_base, loss='mae', C=C))
    +
    +
    +

    Quantifiers based on ELM represent a family of methods based on structured output learning; +these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss +measure. This implementation relies on +Joachims’ SVM perf structured output +learning algorithm, which has to be installed and patched for the purpose (see this +script). +This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))

    +
    +
    Parameters:
    +
      +
    • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) +this path will be obtained from qp.environ[‘SVMPERF_HOME’]

    • +
    • C – trade-off between training error and margin (default 0.01)

    • +
    +
    +
    Returns:
    +

    returns an instance of CC set to work with SVMperf (with loss and C set properly) as the +underlying classifier

    +
    +
    +
    + +
    +
    +quapy.method.aggregative.newSVMKLD(svmperf_base=None, C=1)
    +

    SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence +normalized via the logistic function, as proposed by +Esuli et al. 2015. +Equivalent to:

    +
    >>> CC(SVMperf(svmperf_base, loss='nkld', C=C))
    +
    +
    +

    Quantifiers based on ELM represent a family of methods based on structured output learning; +these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss +measure. This implementation relies on +Joachims’ SVM perf structured output +learning algorithm, which has to be installed and patched for the purpose (see this +script). +This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))

    +
    +
    Parameters:
    +
      +
    • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) +this path will be obtained from qp.environ[‘SVMPERF_HOME’]

    • +
    • C – trade-off between training error and margin (default 0.01)

    • +
    +
    +
    Returns:
    +

    returns an instance of CC set to work with SVMperf (with loss and C set properly) as the +underlying classifier

    +
    +
    +
    + +
    +
    +quapy.method.aggregative.newSVMQ(svmperf_base=None, C=1)
    +

    SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Q loss combining a +classification-oriented loss and a quantification-oriented loss, as proposed by +Barranquero et al. 2015. +Equivalent to:

    +
    >>> CC(SVMperf(svmperf_base, loss='q', C=C))
    +
    +
    +

    Quantifiers based on ELM represent a family of methods based on structured output learning; +these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss +measure. This implementation relies on +Joachims’ SVM perf structured output +learning algorithm, which has to be installed and patched for the purpose (see this +script). +This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))

    +
    +
    Parameters:
    +
      +
    • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) +this path will be obtained from qp.environ[‘SVMPERF_HOME’]

    • +
    • C – trade-off between training error and margin (default 0.01)

    • +
    +
    +
    Returns:
    +

    returns an instance of CC set to work with SVMperf (with loss and C set properly) as the +underlying classifier

    +
    +
    +
    + +
    +
    +quapy.method.aggregative.newSVMRAE(svmperf_base=None, C=1)
    +

    SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first +used by Moreo and Sebastiani, 2021. +Equivalent to:

    +
    >>> CC(SVMperf(svmperf_base, loss='mrae', C=C))
    +
    +
    +

    Quantifiers based on ELM represent a family of methods based on structured output learning; +these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss +measure. This implementation relies on +Joachims’ SVM perf structured output +learning algorithm, which has to be installed and patched for the purpose (see this +script). +This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))

    +
    +
    Parameters:
    +
      +
    • svmperf_base – path to the folder containing the binary files of SVM perf; if set to None (default) +this path will be obtained from qp.environ[‘SVMPERF_HOME’]

    • +
    • C – trade-off between training error and margin (default 0.01)

    • +
    +
    +
    Returns:
    +

    returns an instance of CC set to work with SVMperf (with loss and C set properly) as the +underlying classifier

    +
    +
    +
    +

    quapy.method.base

    @@ -1303,7 +1281,7 @@ validation data, or as an integer, indicating that the misclassification rates s
    -class quapy.method.base.OneVsAllGeneric(binary_quantifier, n_jobs=None, parallel_backend='loky')
    +class quapy.method.base.OneVsAllGeneric(binary_quantifier, n_jobs=None)

    Bases: OneVsAll, BaseQuantifier

    Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.

    @@ -1343,8 +1321,8 @@ quantifier for each class, and then l1-normalizes the outputs so that the class
    -
    -quapy.method.base.getOneVsAll(binary_quantifier, n_jobs=None, parallel_backend='loky')
    +
    +quapy.method.base.newOneVsAll(binary_quantifier, n_jobs=None)
    diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 99c18d6..27108e2 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["Datasets", "Evaluation", "Installation", "Methods", "Model-Selection", "Plotting", "index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["Datasets.md", "Evaluation.md", "Installation.rst", "Methods.md", "Model-Selection.md", "Plotting.md", "index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Datasets", "Evaluation", "Installation", "Quantification Methods", "Model Selection", "Plotting", "Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"quapi": [0, 1, 2, 3, 4, 5], "make": [0, 1, 3, 8, 11], "avail": [0, 1, 2, 3, 5, 6, 9, 11], "sever": [0, 10], "have": [0, 1, 2, 3, 4, 5, 8, 10, 11], "been": [0, 3, 4, 5, 8, 9, 10, 11], "us": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "quantif": [0, 1, 6, 8, 9, 10, 11], "literatur": [0, 1, 4, 6], "well": [0, 3, 4, 5, 11], "an": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "interfac": [0, 1, 11], "allow": [0, 1, 2, 3, 5, 8, 9, 10, 11], "anyon": 0, "import": [0, 1, 3, 4, 5, 6, 10, 11], "A": [0, 3, 8, 9, 10, 11], "object": [0, 8, 9, 10, 11], "i": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "roughli": 0, "pair": [0, 8], "labelledcollect": [0, 3, 4, 8, 10, 11], "one": [0, 1, 3, 4, 5, 8, 10, 11], "plai": 0, "role": 0, "train": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "set": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "anoth": [0, 1, 3, 5, 10], "test": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "class": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "consist": [0, 4, 5, 8, 9, 10, 11], "iter": [0, 8, 11], "instanc": [0, 3, 4, 5, 6, 8, 9, 10, 11], "label": [0, 3, 4, 5, 6, 8, 9, 10, 11], "thi": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "handl": 0, "most": [0, 3, 5, 6, 8, 10, 11], "sampl": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "function": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11], "take": [0, 3, 5, 8, 10, 11], "look": [0, 1, 3, 5, 11], "follow": [0, 1, 3, 4, 5, 6, 8, 11], "code": [0, 3, 4, 5, 9], "qp": [0, 1, 3, 4, 5, 6, 8, 10, 11], "f": [0, 1, 3, 4, 5, 6, 10], "1st": 0, "posit": [0, 3, 5, 8, 10, 11], "document": [0, 1, 3, 5, 9, 10, 11], "2nd": 0, "onli": [0, 3, 5, 8, 9, 10, 11], "neg": [0, 5, 8, 11], "neutral": 0, "3rd": 0, "2": [0, 1, 3, 5, 8, 10, 11], "0": [0, 1, 3, 4, 5, 8, 9, 10, 11], "1": [0, 1, 3, 4, 5, 8, 9, 10, 11], "print": [0, 1, 3, 4, 6, 9, 10], "strprev": [0, 1, 8], "preval": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "prec": [0, 8], "output": [0, 1, 3, 4, 9, 10, 11], "show": [0, 1, 3, 4, 5, 8, 9, 10, 11], "digit": 0, "precis": [0, 1, 8], "17": 0, "50": [0, 5, 8, 11], "33": [0, 5, 8], "One": [0, 1, 3, 11], "can": [0, 1, 2, 3, 4, 5, 8, 10, 11], "easili": [0, 2, 5, 9], "produc": [0, 1, 5, 8], "new": [0, 3, 8, 9, 10], "desir": [0, 1, 10], "sample_s": [0, 1, 3, 4, 5, 8, 11], "10": [0, 1, 4, 5, 8, 9, 11], "prev": [0, 1, 8, 10], "4": [0, 1, 3, 4, 5, 10, 11], "5": [0, 1, 3, 4, 5, 8, 9, 10, 11], "which": [0, 1, 3, 4, 5, 8, 9, 10, 11], "40": [0, 3, 4, 11], "made": [0, 2, 8, 10, 11], "across": [0, 1, 4, 5, 6, 8, 11], "differ": [0, 1, 3, 4, 5, 6, 8, 10, 11], "run": [0, 1, 2, 3, 4, 5, 8, 10, 11], "e": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "g": [0, 1, 3, 4, 6, 8, 10, 11], "method": [0, 1, 4, 5, 6, 8], "same": [0, 3, 5, 8, 10, 11], "exact": [0, 10], "retain": [0, 3, 9, 11], "index": [0, 3, 6, 8, 9, 10, 11], "gener": [0, 1, 3, 4, 5, 8, 9, 10, 11], "sampling_index": [0, 10], "sampling_from_index": [0, 10], "also": [0, 1, 2, 3, 5, 6, 8, 9], "implement": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "artifici": [0, 1, 3, 4, 5, 6, 8], "protocol": [0, 3, 4, 5, 6, 7, 10, 11], "via": [0, 2, 3, 8, 9, 11], "python": [0, 6], "": [0, 1, 3, 4, 5, 8, 9, 10, 11], "seri": [0, 10], "equidist": [0, 8], "rang": [0, 5, 8, 11], "entir": [0, 3, 4, 5, 8], "spectrum": [0, 1, 4, 5, 8], "simplex": [0, 8], "space": [0, 4, 8, 9], "artificial_sampling_gener": 0, "100": [0, 1, 3, 4, 5, 8, 9, 10, 11], "n_preval": [0, 8], "each": [0, 1, 3, 4, 5, 8, 9, 10, 11], "valid": [0, 1, 3, 4, 5, 8, 9, 10, 11], "combin": [0, 1, 4, 8, 11], "origin": [0, 3, 8, 10], "from": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "split": [0, 3, 4, 5, 8, 9, 10, 11], "point": [0, 1, 3, 8, 10], "25": [0, 5, 8, 9, 11], "75": [0, 5, 8], "00": [0, 1, 4], "see": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "evalu": [0, 3, 4, 5, 6, 7, 9, 10, 11], "wiki": [0, 3], "further": [0, 1, 3, 9, 10, 11], "detail": [0, 1, 3, 6, 9, 10, 11], "how": [0, 1, 3, 4, 5, 8, 10, 11], "properli": 0, "three": [0, 5], "about": [0, 5, 8, 10], "kindl": [0, 1, 3, 5, 10, 11], "devic": [0, 3, 5, 9, 11], "harri": 0, "potter": 0, "known": [0, 3, 4, 8, 11], "imdb": [0, 5, 10], "movi": 0, "fetch": [0, 6], "unifi": [0, 11], "For": [0, 1, 5, 6, 8, 10], "exampl": [0, 1, 3, 4, 5, 8, 9, 10, 11], "fetch_review": [0, 1, 3, 4, 5, 10, 11], "These": [0, 9], "esuli": [0, 2, 3, 9, 10, 11], "moreo": [0, 3, 4, 10, 11], "sebastiani": [0, 3, 4, 10, 11], "2018": [0, 3, 10], "octob": [0, 3], "recurr": [0, 3, 10], "neural": [0, 8, 10], "network": [0, 8, 9, 10, 11], "In": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "proceed": [0, 3, 10], "27th": [0, 3, 10], "acm": [0, 3, 10, 11], "intern": [0, 3, 9, 10], "confer": [0, 3, 9, 10], "inform": [0, 1, 3, 4, 8, 9, 10, 11], "knowledg": [0, 3, 10], "manag": [0, 3, 10], "pp": [0, 3, 9], "1775": [0, 3], "1778": [0, 3], "The": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11], "list": [0, 5, 8, 9, 10, 11], "id": [0, 3, 10], "reviews_sentiment_dataset": [0, 10], "some": [0, 1, 3, 5, 8, 10, 11], "statist": [0, 1, 8, 11], "fhe": 0, "ar": [0, 1, 3, 4, 5, 8, 9, 10, 11], "summar": 0, "below": [0, 2, 3, 5, 8, 10], "size": [0, 1, 3, 8, 9, 10, 11], "type": [0, 3, 8, 10, 11], "hp": [0, 3, 4, 10], "9533": 0, "18399": 0, "018": 0, "982": 0, "065": 0, "935": 0, "text": [0, 3, 8, 9, 10, 11], "3821": [0, 10], "21591": [0, 10], "081": [0, 10], "919": [0, 10], "063": [0, 10], "937": [0, 10], "25000": 0, "500": [0, 1, 4, 5, 11], "11": [0, 1, 6, 8], "analysi": [0, 3, 6, 10], "access": [0, 3, 10, 11], "were": 0, "tf": [0, 10], "idf": 0, "format": [0, 5, 10, 11], "present": [0, 3, 10], "two": [0, 1, 3, 4, 5, 8, 10, 11], "val": [0, 9, 10], "model": [0, 1, 5, 6, 8, 9, 11], "select": [0, 3, 6, 8, 10, 11], "purpos": [0, 11], "exemplifi": 0, "load": [0, 3, 8, 10, 11], "fetch_twitt": [0, 3, 6, 10], "gasp": [0, 10], "for_model_select": [0, 10], "true": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "gao": [0, 3, 10, 11], "w": [0, 3, 10], "2015": [0, 2, 3, 9, 11], "august": 0, "tweet": [0, 3, 10], "classif": [0, 1, 3, 6, 8, 10, 11], "ieee": 0, "advanc": [0, 6], "social": [0, 3, 10], "mine": [0, 3], "asonam": 0, "97": 0, "104": 0, "semeval13": [0, 10], "semeval14": [0, 10], "semeval15": [0, 10], "share": [0, 10], "semev": 0, "mean": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "would": [0, 1, 3, 5, 6, 10, 11], "get": [0, 1, 5, 8, 9, 10, 11], "when": [0, 1, 3, 4, 5, 8, 9, 10], "request": [0, 8, 10, 11], "ani": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "them": [0, 3, 10, 11], "consult": [0, 1], "twitter_sentiment_datasets_test": [0, 10], "9": [0, 1, 3, 5, 8], "replac": [0, 3, 10], "twitter_sentiment_datasets_train": [0, 10], "found": [0, 3, 4, 8, 9, 10], "featur": [0, 10], "3": [0, 1, 3, 5, 6, 8, 9, 10, 11], "8788": 0, "3765": 0, "694582": 0, "421": 0, "496": 0, "082": [0, 1], "407": 0, "507": 0, "086": 0, "spars": [0, 10], "hcr": [0, 3, 10], "1594": 0, "798": 0, "222046": 0, "546": 0, "211": 0, "243": 0, "640": 0, "167": 0, "193": 0, "omd": [0, 10], "1839": 0, "787": 0, "199151": 0, "463": 0, "271": 0, "266": 0, "437": 0, "283": [0, 1], "280": 0, "sander": [0, 10], "2155": 0, "923": 0, "229399": 0, "161": 0, "691": 0, "148": 0, "164": [0, 3], "688": 0, "11338": 0, "3813": 0, "1215742": 0, "159": 0, "470": 0, "372": 0, "158": 0, "430": 0, "412": 0, "1853": 0, "109": 0, "361": 0, "530": 0, "2390": 0, "153": 0, "413": 0, "434": 0, "semeval16": [0, 6, 10], "8000": 0, "2000": 0, "889504": 0, "157": 0, "351": 0, "492": 0, "163": [0, 1], "341": 0, "497": 0, "sst": [0, 10], "2971": 0, "1271": 0, "376132": 0, "261": 0, "452": 0, "288": 0, "207": 0, "481": 0, "312": 0, "wa": [0, 3, 5, 8, 10, 11], "2184": 0, "936": 0, "248563": 0, "305": 0, "414": 0, "281": 0, "282": 0, "446": 0, "272": 0, "wb": [0, 10], "4259": 0, "1823": 0, "404333": 0, "270": 0, "392": 0, "337": 0, "274": 0, "335": 0, "32": [0, 6], "repositori": [0, 10], "p\u00e9rez": [0, 3, 10, 11], "g\u00e1llego": [0, 3, 10, 11], "p": [0, 3, 8, 9, 10, 11], "quevedo": [0, 3, 10], "j": [0, 3, 10, 11], "r": [0, 3, 8, 10], "del": [0, 3, 10], "coz": [0, 3, 10], "2017": [0, 3, 10, 11], "ensembl": [0, 6, 10, 11], "problem": [0, 3, 5, 8, 10, 11], "characteriz": [0, 3, 10], "chang": [0, 1, 3, 10], "distribut": [0, 3, 5, 8, 10, 11], "case": [0, 1, 3, 4, 5, 8, 9, 10, 11], "studi": [0, 3, 10], "fusion": [0, 3, 10], "34": [0, 3, 10, 11], "87": [0, 3, 10], "doe": [0, 2, 3, 8, 11], "exactli": 0, "coincid": [0, 6], "et": [0, 2, 9, 10, 11], "al": [0, 2, 9, 10, 11], "sinc": [0, 1, 3, 5, 10, 11], "we": [0, 1, 3, 4, 5, 6, 10], "unabl": 0, "find": [0, 4, 11], "diabet": 0, "phonem": 0, "call": [0, 1, 5, 8, 10, 11], "fetch_ucidataset": [0, 3, 10], "yeast": [0, 10], "verbos": [0, 1, 4, 8, 9, 10, 11], "return": [0, 1, 3, 4, 5, 8, 9, 10, 11], "randomli": [0, 10], "drawn": [0, 1, 4, 8, 10], "stratifi": [0, 3, 9, 10, 11], "manner": [0, 9, 11], "whole": [0, 1, 3, 4, 8, 9], "collect": [0, 8, 9, 10], "70": 0, "30": [0, 1, 3, 11], "respect": [0, 1, 5, 8, 11], "option": [0, 1, 3, 5, 10, 11], "indic": [0, 1, 3, 4, 5, 8, 9, 10, 11], "descript": [0, 10], "should": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "standard": [0, 1, 5, 8, 9, 10, 11], "paper": [0, 3, 9, 11], "submit": 0, "kfcv": [0, 9, 10, 11], "order": [0, 2, 3, 5, 8, 10, 11], "accommod": 0, "practic": [0, 4], "could": [0, 1, 3, 4, 5, 6], "first": [0, 1, 2, 3, 5, 8, 10, 11], "instanti": [0, 1, 3, 4, 8, 9, 11], "creat": [0, 6, 8, 11], "time": [0, 1, 3, 8, 10, 11], "fetch_ucilabelledcollect": [0, 10], "nfold": [0, 8, 10], "nrepeat": [0, 10], "abov": [0, 3, 5, 8], "conduct": [0, 8], "2x5fcv": 0, "all": [0, 1, 2, 3, 5, 8, 9, 11], "come": [0, 8, 10, 11], "numer": [0, 1, 3, 6, 10, 11], "form": [0, 8, 10, 11], "dens": [0, 11], "matric": [0, 5, 10], "acut": 0, "120": 0, "6": [0, 1, 3, 5, 10], "508": 0, "b": [0, 8, 10, 11], "583": 0, "417": 0, "balanc": [0, 4, 11], "625": 0, "539": 0, "461": 0, "922": 0, "078": 0, "breast": 0, "cancer": 0, "683": 0, "350": 0, "650": 0, "cmc": 0, "1473": 0, "573": 0, "427": 0, "774": 0, "226": 0, "653": 0, "347": 0, "ctg": 0, "2126": 0, "22": [0, 3, 9, 10], "222": [0, 9], "778": 0, "861": 0, "139": 0, "917": 0, "083": 0, "german": 0, "1000": [0, 4, 11], "24": [0, 9], "300": [0, 1, 9], "700": 0, "haberman": [0, 3], "306": 0, "735": 0, "265": 0, "ionospher": 0, "641": 0, "359": 0, "iri": 0, "150": 0, "667": 0, "333": 0, "mammograph": 0, "830": 0, "514": 0, "486": 0, "pageblock": 0, "5473": 0, "979": 0, "021": 0, "semeion": 0, "1593": 0, "256": [0, 9], "901": 0, "099": 0, "sonar": 0, "208": 0, "60": 0, "534": 0, "466": 0, "spambas": 0, "4601": 0, "57": 0, "606": 0, "394": 0, "spectf": 0, "267": 0, "44": 0, "794": 0, "206": 0, "tictacto": 0, "958": 0, "transfus": 0, "748": 0, "762": 0, "238": 0, "wdbc": 0, "569": 0, "627": 0, "373": 0, "wine": 0, "178": 0, "13": [0, 9], "669": 0, "331": 0, "601": 0, "399": 0, "730": 0, "q": [0, 2, 3, 8, 9, 11], "red": 0, "1599": 0, "465": 0, "535": 0, "white": 0, "4898": 0, "665": 0, "1484": 0, "8": [0, 1, 5, 10, 11], "711": 0, "289": 0, "download": [0, 2, 3, 8, 10], "automat": [0, 1], "thei": [0, 3, 11], "store": [0, 9, 10, 11], "quapy_data": [0, 8], "folder": [0, 10, 11], "faster": [0, 10], "reus": [0, 3, 8, 10], "howev": [0, 4, 5], "requir": [0, 1, 3, 6, 9], "special": [0, 5, 10], "action": 0, "moment": [0, 3], "fulli": [0, 8], "autom": [0, 3, 6], "cardiotocographi": 0, "excel": 0, "file": [0, 5, 8, 9, 10, 11], "user": [0, 1, 5], "instal": [0, 3, 6, 9, 11], "xlrd": [0, 2], "modul": [0, 1, 3, 5, 6, 7], "open": [0, 6, 10], "page": [0, 2, 6], "block": [0, 8], "need": [0, 3, 8, 10, 11], "unix": 0, "compress": 0, "extens": [0, 2, 5], "z": [0, 10], "directli": [0, 1, 3], "doabl": 0, "packag": [0, 2, 3, 6, 7], "like": [0, 1, 3, 5, 8, 9, 10, 11], "gzip": 0, "zip": [0, 5], "uncompress": 0, "o": [0, 8], "depend": [0, 1, 4, 5, 8, 11], "softwar": 0, "manual": 0, "do": [0, 1, 3, 4, 8, 9, 10, 11], "invok": [0, 1, 3, 8, 10], "provid": [0, 3, 5, 6, 10, 11], "loader": [0, 10], "simpl": [0, 3, 5, 11], "deal": 0, "t": [0, 1, 3, 8, 9, 11], "pre": [0, 3], "n": [0, 1, 8, 9, 11], "second": [0, 1, 3, 5, 8, 10], "represent": [0, 3, 8, 9, 11], "col": [0, 10], "int": [0, 5, 8, 10, 11], "float": [0, 3, 8, 9, 10, 11], "charg": [0, 10], "classmethod": [0, 8, 10, 11], "def": [0, 1, 3, 5, 8], "cl": 0, "path": [0, 3, 5, 8, 9, 10, 11], "str": [0, 8, 10, 11], "loader_func": [0, 10], "callabl": [0, 8, 10, 11], "defin": [0, 3, 8, 9, 10, 11], "argument": [0, 1, 3, 5, 8, 10, 11], "initi": [0, 9, 11], "particular": [0, 1, 3, 11], "receiv": [0, 3, 5], "addition": 0, "number": [0, 1, 3, 5, 8, 9, 10, 11], "specifi": [0, 1, 3, 5, 8, 9, 10], "otherwis": [0, 3, 8, 10], "infer": [0, 10], "least": [0, 10], "pass": [0, 1, 5, 8, 9, 11], "along": [0, 3, 8, 11], "train_path": [0, 10], "my_data": 0, "dat": [0, 9], "test_path": [0, 10], "my_custom_load": 0, "rb": 0, "fin": 0, "preprocess": [0, 1, 3, 8, 11], "includ": [0, 1, 3, 5, 6, 10, 11], "text2tfidf": [0, 1, 3, 10], "tfidf": [0, 4, 5, 10], "vector": [0, 8, 9, 10, 11], "reduce_column": [0, 10], "reduc": [0, 10], "column": [0, 10], "base": [0, 3, 6, 8, 9], "term": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "frequenc": [0, 10, 11], "transform": [0, 9, 10, 11], "valu": [0, 1, 3, 8, 9, 10, 11], "score": [0, 1, 4, 8, 9, 10], "subtract": [0, 8, 10], "normal": [0, 1, 3, 8, 10, 11], "deviat": [0, 1, 5, 8, 10], "so": [0, 1, 3, 5, 8, 9, 10, 11], "zero": [0, 8], "unit": [0, 8], "varianc": [0, 5], "textual": [0, 6, 10], "token": [0, 9, 10], "appeal": 1, "tool": [1, 6], "scenario": [1, 3, 4, 5, 6], "dataset": [1, 3, 4, 5, 6, 8, 9, 11], "shift": [1, 4, 6, 8, 9, 11], "particularli": 1, "prior": [1, 3, 4, 5, 6, 8, 11], "probabl": [1, 3, 4, 5, 6, 8, 9, 11], "That": [1, 4], "interest": [1, 5, 6, 8], "estim": [1, 3, 5, 6, 8, 9, 10, 11], "aris": 1, "under": 1, "belief": 1, "those": [1, 3, 4, 5, 8, 9, 11], "might": [1, 8, 10], "ones": [1, 3, 5, 8, 10, 11], "observ": [1, 11], "dure": [1, 5, 11], "other": [1, 3, 5, 6, 8, 10, 11], "word": [1, 3, 6, 9, 10, 11], "simpli": [1, 2, 3, 4, 5, 6, 8, 11], "predictor": 1, "assum": [1, 6, 11], "unlik": [1, 4, 8], "machin": [1, 4, 6, 9], "learn": [1, 2, 3, 4, 6, 8, 9, 10, 11], "govern": 1, "iid": [1, 5, 6], "assumpt": [1, 5, 6], "brief": [1, 10], "dedic": [1, 10], "explain": [1, 5], "here": [1, 11], "mae": [1, 4, 6, 8, 9, 11], "absolut": [1, 3, 5, 6, 8, 11], "mrae": [1, 6, 8, 9, 11], "rel": [1, 3, 8, 10, 11], "mse": [1, 3, 6, 8, 11], "squar": [1, 3, 8], "mkld": [1, 8, 11], "kullback": [1, 3, 8, 11], "leibler": [1, 3, 8, 11], "diverg": [1, 3, 8, 11], "mnkld": [1, 8, 11], "ae": [1, 2, 5, 8, 11], "rae": [1, 2, 8, 11], "se": [1, 8], "kld": [1, 2, 8, 9, 11], "nkld": [1, 2, 6, 8, 9, 11], "individu": [1, 3], "without": [1, 3, 8, 10], "averag": [1, 3, 8, 10, 11], "acc": [1, 3, 5, 6, 8, 11], "accuraci": [1, 5, 8, 11], "f1e": [1, 8], "f1": [1, 8, 9], "true_prev": [1, 5, 8], "prevs_hat": [1, 8], "ndarrai": [1, 3, 8, 10, 11], "contain": [1, 2, 3, 5, 8, 9, 10, 11], "smooth": [1, 8], "stabil": [1, 11], "third": [1, 5], "ep": [1, 8], "none": [1, 4, 8, 9, 10, 11], "paramet": [1, 3, 4, 8, 9, 10, 11], "epsilon": [1, 8, 11], "tradition": 1, "2t": [1, 8], "past": 1, "either": [1, 3, 8, 11], "environ": [1, 3, 4, 5, 8, 11], "variabl": [1, 3, 5, 8, 10], "onc": [1, 3, 5, 8, 10], "ommit": 1, "thereaft": 1, "recommend": [1, 5, 11], "np": [1, 3, 4, 5, 8, 10, 11], "asarrai": 1, "let": [1, 3, 11], "estim_prev": [1, 5, 8], "ae_": 1, "3f": [1, 6], "200": [1, 9], "600": 1, "914": 1, "final": [1, 3, 5, 11], "possibl": [1, 3, 8, 11], "string": [1, 8, 10, 11], "error_funct": 1, "from_nam": [1, 8], "accord": [1, 3, 4, 8, 9, 10, 11], "fix": [1, 4], "cover": [1, 4, 8, 9], "full": [1, 8], "contrast": 1, "natur": [1, 8], "despit": 1, "introduc": 1, "approxim": [1, 5, 8, 9], "preserv": [1, 5, 8], "procol": 1, "equal": [1, 8, 11], "distant": [1, 8], "interv": [1, 5, 8], "n_prevpoint": [1, 4, 5, 8], "determin": [1, 4, 5, 8], "constrain": [1, 5, 8, 10], "obtain": [1, 4, 8, 9, 11], "66": [1, 11], "given": [1, 3, 4, 8, 9, 10, 11], "num_prevalence_combin": [1, 8], "21": [1, 3, 5, 8], "n_class": [1, 3, 8, 9, 10, 11], "n_repeat": [1, 8], "1771": 1, "note": [1, 3, 4, 5, 8, 10], "last": [1, 3, 5, 8, 9, 10], "typic": [1, 4, 5, 8, 9, 10, 11], "singl": [1, 3, 6, 11], "higher": [1, 5], "comput": [1, 3, 5, 8, 11], "perform": [1, 3, 4, 5, 6, 8, 9, 11], "signific": 1, "instead": [1, 3, 4, 8, 10, 11], "work": [1, 3, 4, 5, 8, 10, 11], "wai": [1, 11], "around": [1, 10], "maximum": [1, 8, 9, 11], "budg": 1, "close": [1, 10], "than": [1, 4, 5, 8, 9, 10], "budget": [1, 4], "achiev": [1, 3, 4, 5], "get_nprevpoints_approxim": [1, 8], "5000": [1, 5], "4960": 1, "cost": 1, "sometim": 1, "cumbersom": 1, "control": [1, 4, 8], "overal": 1, "experi": [1, 2, 3, 4, 5, 8], "rather": [1, 4], "By": [1, 3, 8], "avoid": [1, 8], "lead": [1, 10], "closer": 1, "surpass": 1, "script": [1, 2, 3, 6, 11], "pacc": [1, 3, 5, 8, 11], "reli": [1, 3, 8, 11], "logist": [1, 3, 9, 11], "regressor": [1, 3], "classifi": [1, 4, 5, 6, 8, 9, 11], "variou": [1, 5], "metric": [1, 3, 4, 6, 8, 11], "sklearn": [1, 3, 4, 5, 6, 9, 10, 11], "linear_model": [1, 3, 4, 6, 9], "logisticregress": [1, 3, 4, 6, 9, 11], "data": [1, 3, 4, 5, 6, 8, 9, 11], "min_df": [1, 3, 4, 5, 10, 11], "inplac": [1, 3, 10, 11], "lr": [1, 3, 9, 11], "aggreg": [1, 4, 5, 6, 8], "fit": [1, 3, 4, 5, 6, 8, 9, 10, 11], "df": 1, "artificial_sampling_report": 1, "mani": [1, 3, 4, 5, 6, 8, 11], "extract": [1, 8, 10], "categori": [1, 8], "n_repetit": [1, 4, 5], "n_job": [1, 3, 4, 8, 9, 10, 11], "parallel": [1, 3, 8, 9, 10, 11], "worker": [1, 8, 9, 10, 11], "cpu": [1, 9, 11], "random_se": [1, 8], "42": 1, "random": [1, 3, 4, 5, 8, 10], "seed": [1, 4, 8, 10], "replic": [1, 4, 8], "error_metr": [1, 4, 8], "line": [1, 3, 8], "result": [1, 2, 3, 4, 5, 6, 11], "report": 1, "panda": [1, 2], "datafram": 1, "displai": [1, 5, 8, 9], "just": [1, 3], "clearer": 1, "shown": [1, 5, 8], "convert": [1, 3, 8, 9, 10, 11], "repres": [1, 3, 5, 8, 10, 11], "decim": 1, "default": [1, 3, 8, 9, 10, 11], "pd": 1, "set_opt": 1, "expand_frame_repr": 1, "fals": [1, 3, 5, 8, 9, 10, 11], "map": [1, 9, 11], "000": 1, "000e": 1, "091": 1, "909": 1, "009": 1, "048": 1, "426e": 1, "04": 1, "837": 1, "037": 1, "114": 1, "633e": 1, "03": 1, "7": [1, 5, 8, 9, 11], "717": 1, "017": 1, "041": 1, "383e": 1, "366": 1, "634": 1, "034": 1, "070": 1, "412e": 1, "459": 1, "541": 1, "387e": 1, "565": 1, "435": 1, "035": 1, "073": 1, "535e": 1, "654": 1, "346": 1, "046": 1, "108": 1, "701e": 1, "725": 1, "275": 1, "075": 1, "235": 1, "515e": 1, "02": 1, "858": 1, "142": 1, "042": 1, "229": 1, "740e": 1, "945": 1, "055": 1, "27": [1, 3, 9], "357": 1, "219e": 1, "578": 1, "dtype": [1, 10], "float64": 1, "artificial_sampling_ev": [1, 4], "artificial_sampling_predict": [1, 5], "arrai": [1, 3, 5, 8, 9, 10, 11], "pip": 2, "older": 2, "version": [2, 8, 9, 11], "scikit": [2, 3, 4, 8, 9, 10, 11], "numpi": [2, 4, 8, 9], "scipi": [2, 10], "pytorch": [2, 11], "quanet": [2, 6, 9, 11], "svmperf": [2, 3, 8, 11], "patch": [2, 3, 9, 11], "joblib": [2, 11], "tqdm": 2, "matplotlib": [2, 8], "involv": [2, 5, 8], "you": [2, 3], "appli": [2, 3, 4, 5, 8, 9, 10, 11], "ext": 2, "compil": [2, 3], "sourc": [2, 3, 6, 9], "prepare_svmperf": [2, 3], "sh": [2, 3], "job": 2, "directori": [2, 8, 9, 10, 11], "svm_perf_quantif": [2, 3], "optim": [2, 3, 4, 8, 9, 11], "measur": [2, 3, 4, 5, 6, 8, 11], "propos": [2, 3, 11], "barranquero": [2, 3, 9, 11], "extend": [2, 3, 8, 11], "former": [2, 11], "categor": [3, 10], "belong": [3, 11], "non": [3, 11], "group": 3, "though": [3, 8], "plan": 3, "add": [3, 4, 8, 10], "more": [3, 5, 11], "futur": 3, "character": [3, 6], "fact": [3, 5], "product": [3, 10], "quantifi": [3, 4, 5, 6, 8, 10, 11], "shoud": 3, "basequantifi": [3, 8, 11], "abstract": [3, 8, 9, 10, 11], "abstractmethod": 3, "self": [3, 8, 9, 10, 11], "set_param": [3, 8, 9, 11], "get_param": [3, 8, 9, 11], "deep": [3, 8, 11], "familiar": 3, "structur": [3, 11], "inspir": 3, "reason": [3, 5, 6], "why": 3, "ha": [3, 4, 5, 8, 9, 10, 11], "adopt": [3, 4, 10], "respond": 3, "predict": [3, 4, 5, 8, 9, 11], "input": [3, 5, 8, 9, 11], "element": [3, 10, 11], "while": [3, 5, 9, 10, 11], "selector": 3, "process": [3, 4, 8], "hyperparamet": [3, 8, 11], "search": [3, 4, 6, 8, 11], "part": [3, 10], "aggregativequantifi": [3, 11], "must": [3, 10, 11], "fit_learn": 3, "classif_predict": [3, 11], "mention": 3, "befor": [3, 8, 9, 10, 11], "inde": [3, 4], "alreadi": [3, 8, 11], "preclassifi": 3, "maintain": [3, 11], "through": [3, 8], "properti": [3, 8, 9, 10, 11], "learner": [3, 4, 9, 11], "extern": 3, "probabilist": [3, 9, 11], "inherit": 3, "aggregativeprobabilisticquantifi": [3, 11], "posterior": [3, 8, 9, 11], "crisp": [3, 8, 11], "decis": [3, 8, 9, 11], "hard": [3, 9], "classif_posterior": [3, 11], "posterior_prob": [3, 11], "advantag": [3, 11], "procedur": [3, 6, 8], "veri": [3, 5], "effici": 3, "everi": [3, 8, 11], "leverag": 3, "speed": [3, 11], "up": [3, 4, 8, 9, 11], "over": [3, 4, 8], "customarili": [3, 4], "done": 3, "four": 3, "cc": [3, 5, 11], "simplest": 3, "deliv": [3, 11], "adjust": [3, 6, 8, 11], "pcc": [3, 4, 5, 11], "soft": 3, "serv": [3, 8, 10], "complet": [3, 5, 11], "equip": [3, 5], "svm": [3, 5, 6, 9, 10, 11], "linearsvc": [3, 5, 10], "pickl": [3, 8, 10, 11], "alia": [3, 8, 10, 11], "classifyandcount": [3, 11], "estim_preval": [3, 6, 11], "rate": [3, 8, 9, 11], "binari": [3, 5, 6, 8, 9, 10, 11], "init": 3, "addit": 3, "val_split": [3, 4, 9, 11], "integ": [3, 8, 9, 10, 11], "k": [3, 6, 8, 9, 10, 11], "fold": [3, 8, 10, 11], "cross": [3, 8, 9, 10, 11], "specif": [3, 4, 8], "held": [3, 4, 8, 9, 11], "out": [3, 4, 5, 8, 9, 10, 11], "postpon": 3, "constructor": 3, "prevail": 3, "overrid": 3, "illustr": [3, 4, 5], "seem": 3, "calibr": [3, 8], "calibratedclassifiercv": 3, "base_estim": 3, "cv": [3, 4], "predict_proba": [3, 9, 11], "As": [3, 4], "calibratedclassifi": 3, "except": [3, 8, 11], "rais": [3, 8, 11], "lastli": 3, "everyth": 3, "said": 3, "aboud": 3, "sld": [3, 11], "expectationmaximizationquantifi": [3, 11], "describ": [3, 8, 11], "saeren": [3, 11], "m": [3, 8, 11], "latinn": [3, 11], "decaesteck": [3, 11], "c": [3, 4, 8, 9, 10, 11], "2002": 3, "priori": 3, "14": 3, "41": 3, "attempt": [3, 11], "although": [3, 4, 5, 11], "improv": [3, 8, 9, 11], "rank": [3, 9], "almost": 3, "alwai": [3, 4, 5, 11], "among": 3, "effect": 3, "carri": [3, 10, 11], "gonz\u00e1lez": 3, "castro": 3, "v": [3, 8, 9, 11], "alaiz": 3, "rodr\u0131": 3, "guez": 3, "alegr": 3, "2013": 3, "scienc": 3, "218": 3, "146": 3, "It": [3, 4, 5, 8], "allia": 3, "hellingerdistancei": [3, 11], "mixtur": [3, 8, 11], "previou": 3, "overridden": [3, 11], "proport": [3, 4, 9, 10, 11], "taken": [3, 8, 9, 10], "itself": [3, 8, 11], "accept": 3, "elm": [3, 11], "famili": [3, 11], "target": [3, 5, 6, 8, 9, 11], "orient": [3, 6, 8, 11], "joachim": [3, 9, 11], "svmq": [3, 11], "d\u00edez": 3, "reliabl": 3, "pattern": 3, "recognit": 3, "48": 3, "591": 3, "604": 3, "svmkld": [3, 11], "multivari": [3, 9], "transact": 3, "discoveri": 3, "articl": [3, 4], "svmnkld": [3, 11], "svmae": [3, 11], "error": [3, 4, 6, 7, 9, 11], "svmrae": [3, 11], "what": 3, "nowadai": 3, "consid": [3, 5, 8, 9, 10, 11], "behav": [3, 5], "If": [3, 5, 8, 10, 11], "want": [3, 4], "custom": [3, 6, 10], "modifi": [3, 8], "assign": [3, 10], "Then": 3, "re": [3, 4, 9, 10], "thing": 3, "your": 3, "svmperf_hom": 3, "valid_loss": [3, 9, 11], "mycustomloss": 3, "28": [3, 10], "current": [3, 8, 9, 10, 11], "support": [3, 6, 9, 10, 11], "oper": 3, "trivial": 3, "strategi": [3, 4], "2016": [3, 10, 11], "sentiment": [3, 6, 10], "19": [3, 10], "onevsal": [3, 11], "know": 3, "where": [3, 5, 8, 9, 10, 11], "top": [3, 8, 11], "thu": [3, 4, 5, 8, 9, 11], "nor": 3, "castano": [3, 10], "2019": [3, 10, 11], "dynam": [3, 9, 10, 11], "task": [3, 4, 10], "45": [3, 5, 10], "15": [3, 8, 10], "polici": [3, 11], "processor": 3, "av": [3, 11], "ptr": [3, 11], "member": [3, 11], "d": [3, 11], "static": [3, 11], "red_siz": [3, 11], "pleas": 3, "check": [3, 4, 8], "offer": [3, 6], "torch": [3, 9, 11], "embed": [3, 9, 11], "lstm": [3, 9, 11], "cnn": [3, 11], "its": [3, 4, 8, 9, 11], "layer": [3, 9, 11], "neuralclassifiertrain": [3, 9, 11], "cnnnet": [3, 9, 11], "vocabulary_s": [3, 9, 10, 11], "cuda": [3, 9, 11], "supervis": [4, 6], "strongli": [4, 5], "good": [4, 5], "choic": [4, 11], "hyper": [4, 8, 9], "wherebi": 4, "chosen": [4, 8], "pick": 4, "best": [4, 8, 9, 11], "being": [4, 8, 11], "criteria": 4, "solv": [4, 11], "assess": 4, "own": 4, "right": [4, 8, 10], "impos": [4, 8], "aim": [4, 5], "appropri": 4, "configur": [4, 8], "design": 4, "long": [4, 9], "regard": 4, "next": [4, 8, 9, 10], "section": 4, "argu": 4, "alejandro": 4, "fabrizio": 4, "count": [4, 5, 6, 8, 10, 11], "arxiv": 4, "preprint": 4, "2011": 4, "02552": 4, "2020": [4, 9], "varieti": 4, "exhibit": [4, 5], "degre": 4, "model_select": [4, 7, 11], "gridsearchq": [4, 8, 11], "grid": [4, 8, 11], "explor": [4, 8], "portion": 4, "param_grid": [4, 8, 11], "logspac": [4, 11], "class_weight": [4, 11], "eval_budget": 4, "refit": [4, 8], "retrain": [4, 9], "goe": 4, "end": [4, 8, 11], "best_params_": 4, "best_model_": 4, "101": 4, "5f": 4, "system": [4, 11], "start": 4, "hyperparam": 4, "0001": [4, 11], "got": [4, 11], "24987": 4, "48135": 4, "001": [4, 9, 11], "24866": 4, "100000": 4, "43676": 4, "finish": 4, "param": [4, 8, 9, 11], "19982": 4, "develop": [4, 6], "1010": 4, "5005": 4, "54it": 4, "20342": 4, "altern": 4, "computation": 4, "costli": 4, "try": 4, "theoret": 4, "suboptim": 4, "opt": 4, "gridsearchcv": [4, 11], "10000": 4, "5379": 4, "55it": 4, "41734": 4, "wors": [4, 5, 8], "larg": 4, "between": [4, 5, 6, 8, 9, 11], "modal": 4, "turn": 4, "better": 4, "nonetheless": 4, "happen": [4, 5], "basic": [5, 11], "help": [5, 11], "analys": [5, 6], "outcom": 5, "main": 5, "method_nam": [5, 8, 11], "name": [5, 8, 9, 10, 11], "shape": [5, 8, 9, 10, 11], "correspond": [5, 10], "matrix": [5, 8, 11], "appear": 5, "occur": [5, 10], "merg": 5, "emq": [5, 11], "55": 5, "showcas": 5, "wide": 5, "variant": [5, 6, 8, 11], "linear": [5, 8, 11], "review": [5, 6, 10], "step": [5, 8], "05": [5, 8, 11], "gen_data": 5, "base_classifi": 5, "yield": [5, 8, 10, 11], "tr_prev": [5, 8, 11], "append": 5, "__class__": 5, "__name__": 5, "insight": 5, "view": 5, "y": [5, 8, 9, 10, 11], "axi": [5, 8], "against": 5, "x": [5, 8, 9, 10, 11], "unfortun": 5, "limit": [5, 8, 11], "binary_diagon": [5, 8], "train_prev": [5, 8], "savepath": [5, 8], "bin_diag": 5, "png": 5, "save": [5, 8], "pdf": [5, 11], "cyan": 5, "dot": [5, 8], "color": [5, 8], "band": [5, 8], "hidden": [5, 9, 11], "show_std": [5, 8], "unadjust": 5, "bias": 5, "toward": [5, 10], "seen": [5, 8, 11], "evinc": 5, "box": [5, 8], "binary_bias_glob": [5, 8], "bin_bia": 5, "unbias": 5, "center": 5, "tend": 5, "overestim": 5, "high": [5, 8], "lower": [5, 11], "again": 5, "accordingli": 5, "20": [5, 8, 11], "90": [5, 8], "rewrit": 5, "method_data": 5, "training_preval": 5, "linspac": 5, "training_s": 5, "suffic": 5, "latex": 5, "syntax": 5, "_": [5, 8, 10], "now": 5, "clearli": 5, "binary_bias_bin": [5, 8], "broken": [5, 8], "down": [5, 8, 10], "bin": [5, 8, 11], "To": [5, 10], "nbin": [5, 8, 11], "isometr": [5, 8], "subinterv": 5, "interestingli": 5, "enough": 5, "seemingli": 5, "tendenc": 5, "low": [5, 8, 9], "underestim": 5, "beyond": 5, "67": [5, 8], "curios": 5, "pretti": 5, "discuss": 5, "analyz": 5, "compar": [5, 8], "both": [5, 10], "irrespect": [5, 11], "harder": 5, "interpret": [5, 6, 11], "error_by_drift": [5, 8], "error_nam": [5, 8], "n_bin": [5, 8, 11], "err_drift": 5, "whenev": [5, 8], "clear": 5, "lowest": 5, "difficult": 5, "rememb": 5, "solid": 5, "comparison": 5, "detriment": 5, "visual": [5, 6], "hide": 5, "framework": [6, 11], "written": 6, "root": 6, "concept": 6, "baselin": 6, "integr": 6, "commonli": 6, "facilit": 6, "twitter": [6, 10], "true_preval": 6, "hold": [6, 8, 11], "endeavour": [6, 8], "popular": 6, "expect": [6, 11], "maxim": [6, 11], "hdy": [6, 11], "versatil": 6, "etc": 6, "uci": [6, 10], "nativ": 6, "loss": [6, 9, 11], "perf": [6, 9, 11], "ad": 6, "meta": [6, 8], "plot": [6, 7], "diagon": [6, 8], "bia": [6, 8, 9, 11], "drift": 6, "api": 6, "subpackag": 7, "submodul": 7, "util": [7, 9, 10], "content": 7, "bctscalibr": 9, "nbvscalibr": 9, "recalibratedprobabilisticclassifi": 9, "recalibratedprobabilisticclassifierbas": 9, "classes_": [9, 10, 11], "fit_cv": 9, "fit_tr_val": 9, "tscalibr": 9, "vscalibr": 9, "lowranklogisticregress": 9, "document_embed": 9, "lstmnet": 9, "reset_net_param": 9, "textclassifiernet": 9, "dimens": [8, 9, 10, 11], "forward": [9, 11], "xavier_uniform": 9, "torchdataset": 9, "asdataload": 9, "decision_funct": 9, "splitstratifi": 10, "stat": 10, "train_test": 10, "xp": 10, "xy": 10, "split_random": 10, "split_stratifi": 10, "uniform_sampl": 10, "uniform_sampling_index": 10, "fetch_lequa2022": 10, "warn": 10, "indextransform": 10, "add_word": 10, "fit_transform": 10, "reader": 8, "binar": [8, 10], "from_csv": 10, "from_spars": 10, "from_text": 10, "reindex_label": 10, "getptecondestim": 11, "solve_adjust": 11, "adjustedclassifyandcount": 11, "distributionmatch": 11, "dy": 11, "em": 11, "max_it": 11, "explicitlossminimis": 11, "max": 11, "ms2": 11, "mediansweep": 11, "mediansweep2": 11, "probabilisticadjustedclassifyandcount": 11, "probabilisticclassifyandcount": 11, "smm": 11, "t50": 11, "thresholdoptim": 11, "cross_generate_predict": 11, "cross_generate_predictions_depr": 11, "binaryquantifi": 11, "onevsallgener": 11, "eacc": 11, "ecc": 11, "eemq": 11, "ehdi": 11, "epacc": 11, "valid_polici": 11, "ensemblefactori": 11, "get_probability_distribut": 11, "quanetmodul": 11, "quanettrain": 11, "clean_checkpoint": 11, "clean_checkpoint_dir": 11, "mae_loss": 11, "non_aggreg": 8, "maximumlikelihoodprevalenceestim": 11, "absolute_error": 8, "hat": 8, "frac": 8, "mathcal": 8, "sum_": 8, "acc_error": 8, "y_true": 8, "y_pred": 8, "tp": 8, "tn": 8, "fp": 8, "fn": 8, "stand": [8, 11], "f1_error": 8, "macro": 8, "f_1": 8, "harmon": 8, "recal": 8, "2tp": 8, "independ": [8, 11], "err_nam": 8, "p_hat": 8, "d_": 8, "kl": 8, "log": [8, 10], "factor": 8, "beforehand": 8, "n_sampl": [8, 9], "mean_absolute_error": 8, "mean_relative_absolute_error": 8, "relative_absolute_error": 8, "underlin": 8, "displaystyl": 8, "abstractprotocol": 8, "union": [8, 10, 11], "aggr_speedup": 8, "auto": 8, "evaluation_report": 8, "app": [8, 11], "repeat": 8, "smooth_limits_epsilon": 8, "random_st": [8, 10], "return_typ": 8, "sample_prev": 8, "abstractstochasticseededprotocol": 8, "onlabelledcollectionprotocol": 8, "95": 8, "copi": [8, 10], "quantiti": 8, "labelled_collect": 8, "prevalence_grid": 8, "exhaust": 8, "sum": [8, 11], "implicit": 8, "return_constrained_dim": 8, "rest": [8, 9, 10, 11], "quit": 8, "obvious": 8, "determinist": 8, "anywher": 8, "multipli": 8, "necessari": 8, "samples_paramet": 8, "total": 8, "parent": 8, "sequenc": 8, "enforc": 8, "collat": 8, "arg": [8, 10], "domainmix": 8, "domaina": 8, "domainb": 8, "mixture_point": 8, "domain": 8, "scale": [8, 9, 11], "npp": 8, "draw": 8, "uniformli": 8, "therefor": 8, "get_col": 8, "get_labelled_collect": 8, "on_preclassified_inst": 8, "pre_classif": 8, "in_plac": 8, "usimplexpp": 8, "kraemer": 8, "algorithm": [8, 11], "sens": 8, "guarante": [8, 10], "prefer": 8, "intract": 8, "hellingerdist": 8, "hellingh": 8, "distanc": [8, 11], "hd": [8, 11], "discret": [8, 11], "sqrt": 8, "p_i": 8, "q_i": 8, "real": [8, 9, 10, 11], "topsoedist": 8, "1e": [8, 9, 11], "topso": [8, 11], "adjusted_quantif": 8, "prevalence_estim": 8, "tpr": [8, 11], "fpr": [8, 11], "clip": 8, "exce": 8, "check_prevalence_vector": 8, "raise_except": 8, "toleranz": 8, "08": 8, "combinations_budget": 8, "largest": 8, "dimension": [8, 9, 10, 11], "repetit": 8, "less": [8, 10], "normalize_preval": 8, "l1": [8, 11], "calcul": 8, "binom": 8, "mass": 8, "alloc": [8, 9], "solut": 8, "star": 8, "bar": 8, "prevalence_from_label": 8, "n_instanc": [8, 9, 11], "correctli": 8, "even": 8, "len": 8, "prevalence_from_prob": 8, "bool": [8, 9, 11], "argmax": 8, "prevalence_linspac": 8, "01": [8, 9, 11], "separ": [8, 10], "99": 8, "uniform_prevalence_sampl": 8, "adapt": [8, 9], "post": 8, "http": [8, 10, 11], "stackexchang": 8, "com": 8, "question": 8, "3227": 8, "uniform": [8, 10], "uniform_simplex_sampl": 8, "dict": [8, 10, 11], "timeout": 8, "dictionari": [8, 9, 10, 11], "kei": [8, 10], "quantification_error": 8, "whether": [8, 9, 10, 11], "ignor": [8, 10, 11], "gen": 8, "establish": 8, "timer": 8, "longer": [8, 11], "timeouterror": 8, "bound": [8, 11], "stdout": 8, "best_model": 8, "after": [8, 11], "minim": [8, 11], "routin": [8, 10, 11], "unus": [8, 9], "contanin": 8, "cross_val_predict": 8, "akin": [8, 11], "issu": 8, "reproduc": [8, 10], "pos_class": [8, 10], "titl": 8, "colormap": 8, "listedcolormap": 8, "vertical_xtick": 8, "legend": 8, "local": 8, "sign": 8, "minu": 8, "classs": 8, "compon": [8, 9, 11], "cm": 8, "tab10": 8, "secondari": 8, "global": 8, "method_ord": 8, "henc": [8, 10], "conveni": 8, "multiclass": [8, 10, 11], "inconveni": 8, "leyend": 8, "hightlight": 8, "associ": 8, "brokenbar_supremacy_by_drift": 8, "isomer": 8, "x_error": 8, "y_error": 8, "ttest_alpha": 8, "005": 8, "tail_density_threshold": 8, "region": 8, "chart": 8, "condit": [8, 11], "ii": 8, "significantli": 8, "side": 8, "confid": 8, "percentil": 8, "divid": 8, "amount": 8, "similar": [8, 11], "threshold": [8, 11], "densiti": 8, "tail": 8, "discard": 8, "outlier": 8, "show_dens": 8, "show_legend": 8, "logscal": 8, "vline": 8, "especi": 8, "mai": 8, "cumberson": 8, "gain": 8, "understand": 8, "fare": 8, "regim": 8, "highlight": 8, "vertic": 8, "earlystop": 8, "patienc": [8, 9, 11], "lower_is_bett": 8, "earli": [8, 9, 11], "stop": [8, 9, 11], "epoch": [8, 9, 11], "best_epoch": 8, "best_scor": 8, "consecut": [8, 9, 11], "monitor": 8, "obtaind": 8, "far": [8, 9, 10], "flag": 8, "keep": 8, "track": 8, "boolean": [8, 10, 11], "create_if_not_exist": 8, "makedir": 8, "exist_ok": 8, "join": 8, "dir": [8, 11], "subdir": 8, "anotherdir": 8, "create_parent_dir": 8, "exist": 8, "txt": 8, "download_fil": 8, "url": 8, "archive_filenam": 8, "destin": 8, "filenam": 8, "download_file_if_not_exist": 8, "dowload": 8, "get_quapy_hom": 8, "home": [8, 10], "perman": 8, "map_parallel": 8, "func": 8, "slice": 8, "item": 8, "wrapper": [8, 9, 10, 11], "multiprocess": [8, 11], "delai": 8, "args_i": 8, "silent": [8, 11], "child": 8, "ensur": 8, "pickled_resourc": 8, "pickle_path": 8, "generation_func": 8, "fast": [8, 10], "resourc": 8, "some_arrai": 8, "mock": [8, 9], "rand": 8, "my_arrai": 8, "pkl": 8, "save_text_fil": 8, "disk": 8, "miss": 8, "temp_se": 8, "context": 8, "tempor": 8, "outer": 8, "state": 8, "within": [8, 11], "get_njob": [], "correct": [9, 11], "temperatur": [9, 11], "bct": [9, 11], "abstent": 9, "alexandari": [9, 11], "afterward": [9, 11], "No": [9, 11], "nbv": [9, 11], "baseestim": [9, 11], "calibratorfactori": 9, "n_compon": 9, "kwarg": [9, 10, 11], "decomposit": 9, "truncatedsvd": 9, "princip": 9, "regress": 9, "n_featur": 9, "length": [9, 10], "eventu": [9, 10], "unalt": 9, "emb": 9, "embedding_s": 9, "hidden_s": 9, "repr_siz": 9, "kernel_height": 9, "stride": 9, "pad": [9, 10], "drop_p": 9, "convolut": 9, "vocabulari": [9, 10], "kernel": 9, "drop": 9, "dropout": [9, 11], "batch": 9, "dataload": 9, "tensor": 9, "n_dimens": 9, "lstm_class_nlay": 9, "short": 9, "memori": 9, "net": 9, "weight_decai": 9, "batch_siz": 9, "64": [9, 11], "batch_size_test": 9, "512": [9, 11], "padding_length": 9, "checkpointpath": 9, "checkpoint": [9, 11], "classifier_net": 9, "weight": [9, 10], "decai": 9, "wait": 9, "enabl": 9, "gpu": [9, 11], "vocab_s": 9, "reiniti": 9, "trainer": 9, "disjoint": 9, "embed_s": 9, "nn": 9, "pad_length": 9, "xavier": 9, "shuffl": [9, 10], "longest": 9, "shorter": 9, "svmperf_bas": [9, 11], "classifiermixin": 9, "thorsten": 9, "refer": [9, 10], "svm_perf_learn": 9, "svm_perf_classifi": 9, "trade": 9, "off": 9, "margin": 9, "std": 9, "qacc": 9, "qf1": 9, "qgm": 9, "12": 9, "26": 9, "23": 9, "train_siz": 10, "conform": 10, "round": 10, "loader_kwarg": 10, "read": 10, "tupl": [10, 11], "tr": 10, "te": 10, "csr": 10, "csr_matrix": 10, "4403": 10, "my_collect": 10, "codefram": 10, "larger": [10, 11], "actual": [10, 11], "empti": 10, "met": 10, "whose": [10, 11], "train_prop": 10, "left": [8, 10], "stratif": 10, "greater": 10, "dataset_nam": 10, "data_hom": 10, "test_split": 10, "predefin": 10, "uci_dataset": 10, "dump": 10, "leav": 10, "quay_data": 10, "ml": 10, "5fcvx2": 10, "x2": 10, "offici": 10, "lequa": 10, "competit": 10, "t1a": 10, "t1b": 10, "t2a": 10, "t2b": 10, "raw": 10, "merchandis": 10, "sperduti": 10, "2022": 10, "overview": 10, "clef": 10, "lequa2022_experi": 10, "py": 10, "guid": 10, "val_gen": 10, "test_gen": 10, "samplesfromdir": 10, "minimun": 10, "kept": 10, "subsequ": 10, "mining6": 10, "devel": 10, "style": 10, "countvector": 10, "keyword": [10, 11], "nogap": 10, "regardless": 10, "codifi": 10, "unknown": 10, "surfac": 10, "assert": 10, "gap": 10, "preced": 10, "decid": 10, "uniqu": 10, "rare": 10, "unk": 10, "minimum": [10, 11], "occurr": 10, "org": [10, 11], "stabl": 10, "feature_extract": 10, "html": 10, "subtyp": 10, "spmatrix": 10, "remov": [10, 11], "infrequ": 10, "aka": [10, 11], "sublinear_tf": 10, "scall": 10, "counter": 10, "tfidfvector": 10, "whcih": 10, "had": 10, "encod": 10, "utf": 10, "csv": 10, "feat1": 10, "feat2": 10, "featn": 10, "covari": 10, "express": 10, "row": 10, "class2int": 10, "collet": 10, "fomart": 10, "progress": 10, "sentenc": 10, "classnam": 10, "u1": 10, "misclassif": 11, "n_classes_": [], "fit_classifi": 11, "bypass": 11, "y_": 11, "ptecondestim": 11, "prevs_estim": 11, "ax": 11, "entri": 11, "y_i": 11, "y_j": 11, "_posterior_probabilities_": 11, "attribut": 11, "subclass": 11, "give": 11, "outsid": 11, "unless": 11, "noth": 11, "els": 11, "cdf": 11, "match": 11, "helling": 11, "sought": 11, "channel": 11, "proper": 11, "ch": 11, "di": 11, "dij": 11, "fraction": 11, "th": 11, "tol": 11, "ternari": 11, "dl": 11, "doi": 11, "1145": 11, "3219819": 11, "3220059": 11, "histogram": 11, "toler": 11, "explicit": 11, "exact_train_prev": 11, "recalib": 11, "updat": 11, "likelihood": [9, 11], "mutual": 11, "recurs": 11, "until": 11, "converg": 11, "suggest": 11, "recalibr": 11, "reach": 11, "loop": 11, "cumul": 11, "unlabel": 11, "latter": 11, "forman": 11, "2006": 11, "2008": 11, "goal": 11, "bring": 11, "denomin": 11, "median": 11, "sweep": 11, "binary_quantifi": 11, "prevel": 11, "emploi": 11, "resp": 11, "subobject": 11, "nest": 11, "pipelin": 11, "__": 11, "simplif": 11, "2021": 11, "equival": 11, "cosest": 11, "heurist": 11, "choos": 11, "ground": 11, "complement": 11, "param_mod_sel": 11, "param_model_sel": 11, "min_po": 11, "max_sample_s": 11, "closest": 11, "preliminari": 11, "recomput": 11, "compat": 11, "l": 11, "base_quantifier_class": 11, "factori": 11, "common": 11, "doc_embedding_s": 11, "stats_siz": 11, "lstm_hidden_s": 11, "lstm_nlayer": 11, "ff_layer": 11, "1024": 11, "bidirect": 11, "qdrop_p": 11, "order_bi": 11, "cell": 11, "connect": 11, "ff": 11, "sort": 11, "doc_embed": 11, "doc_posterior": 11, "recip": 11, "care": 11, "regist": 11, "hook": 11, "n_epoch": 11, "tr_iter_per_poch": 11, "va_iter_per_poch": 11, "checkpointdir": 11, "checkpointnam": 11, "phase": 11, "anyth": 11, "truth": 11, "mlpe": 11, "lazi": 11, "put": 11, "assumpion": 11, "beat": [9, 11], "estimant": 11, "kundaj": 9, "shrikumar": 9, "novemb": 9, "232": 9, "pmlr": 9, "outpu": [], "partit": 9, "ight": [], "valueerror": 8, "attach": 10, "mix": 10, "onevsallaggreg": 11, "parallel_backend": 11, "loki": 11, "backend": 11, "cannot": 11, "temp": 11, "getonevsal": 11}, "objects": {"": [[8, 0, 0, "-", "quapy"]], "quapy": [[9, 0, 0, "-", "classification"], [10, 0, 0, "-", "data"], [8, 0, 0, "-", "error"], [8, 0, 0, "-", "evaluation"], [8, 0, 0, "-", "functional"], [11, 0, 0, "-", "method"], [8, 0, 0, "-", "model_selection"], [8, 0, 0, "-", "plot"], [8, 0, 0, "-", "protocol"], [8, 0, 0, "-", "util"]], "quapy.classification": [[9, 0, 0, "-", "calibration"], [9, 0, 0, "-", "methods"], [9, 0, 0, "-", "neural"], [9, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[9, 1, 1, "", "BCTSCalibration"], [9, 1, 1, "", "NBVSCalibration"], [9, 1, 1, "", "RecalibratedProbabilisticClassifier"], [9, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [9, 1, 1, "", "TSCalibration"], [9, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[9, 2, 1, "", "classes_"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "fit_cv"], [9, 3, 1, "", "fit_tr_val"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[9, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural": [[9, 1, 1, "", "CNNnet"], [9, 1, 1, "", "LSTMnet"], [9, 1, 1, "", "NeuralClassifierTrainer"], [9, 1, 1, "", "TextClassifierNet"], [9, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[9, 2, 1, "", "device"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "reset_net_params"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[9, 3, 1, "", "dimensions"], [9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "forward"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict_proba"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"], [9, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[9, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[9, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[9, 3, 1, "", "decision_function"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "set_params"], [9, 4, 1, "", "valid_losses"]], "quapy.data": [[10, 0, 0, "-", "base"], [10, 0, 0, "-", "datasets"], [10, 0, 0, "-", "preprocessing"], [10, 0, 0, "-", "reader"]], "quapy.data.base": [[10, 1, 1, "", "Dataset"], [10, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[10, 3, 1, "", "SplitStratified"], [10, 2, 1, "", "binary"], [10, 2, 1, "", "classes_"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 3, 1, "", "stats"], [10, 2, 1, "", "train_test"], [10, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[10, 2, 1, "", "X"], [10, 2, 1, "", "Xp"], [10, 2, 1, "", "Xy"], [10, 2, 1, "", "binary"], [10, 3, 1, "", "counts"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 3, 1, "", "mix"], [10, 2, 1, "", "n_classes"], [10, 2, 1, "", "p"], [10, 3, 1, "", "prevalence"], [10, 3, 1, "", "sampling"], [10, 3, 1, "", "sampling_from_index"], [10, 3, 1, "", "sampling_index"], [10, 3, 1, "", "split_random"], [10, 3, 1, "", "split_stratified"], [10, 3, 1, "", "stats"], [10, 3, 1, "", "uniform_sampling"], [10, 3, 1, "", "uniform_sampling_index"], [10, 2, 1, "", "y"]], "quapy.data.datasets": [[10, 5, 1, "", "fetch_UCIDataset"], [10, 5, 1, "", "fetch_UCILabelledCollection"], [10, 5, 1, "", "fetch_lequa2022"], [10, 5, 1, "", "fetch_reviews"], [10, 5, 1, "", "fetch_twitter"], [10, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[10, 1, 1, "", "IndexTransformer"], [10, 5, 1, "", "index"], [10, 5, 1, "", "reduce_columns"], [10, 5, 1, "", "standardize"], [10, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[10, 3, 1, "", "add_word"], [10, 3, 1, "", "fit"], [10, 3, 1, "", "fit_transform"], [10, 3, 1, "", "transform"], [10, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[10, 5, 1, "", "binarize"], [10, 5, 1, "", "from_csv"], [10, 5, 1, "", "from_sparse"], [10, 5, 1, "", "from_text"], [10, 5, 1, "", "reindex_labels"]], "quapy.error": [[8, 5, 1, "", "absolute_error"], [8, 5, 1, "", "acc_error"], [8, 5, 1, "", "acce"], [8, 5, 1, "", "ae"], [8, 5, 1, "", "f1_error"], [8, 5, 1, "", "f1e"], [8, 5, 1, "", "from_name"], [8, 5, 1, "", "kld"], [8, 5, 1, "", "mae"], [8, 5, 1, "", "mean_absolute_error"], [8, 5, 1, "", "mean_relative_absolute_error"], [8, 5, 1, "", "mkld"], [8, 5, 1, "", "mnkld"], [8, 5, 1, "", "mrae"], [8, 5, 1, "", "mse"], [8, 5, 1, "", "nkld"], [8, 5, 1, "", "rae"], [8, 5, 1, "", "relative_absolute_error"], [8, 5, 1, "", "se"], [8, 5, 1, "", "smooth"]], "quapy.evaluation": [[8, 5, 1, "", "evaluate"], [8, 5, 1, "", "evaluation_report"], [8, 5, 1, "", "prediction"]], "quapy.functional": [[8, 5, 1, "", "HellingerDistance"], [8, 5, 1, "", "TopsoeDistance"], [8, 5, 1, "", "adjusted_quantification"], [8, 5, 1, "", "check_prevalence_vector"], [8, 5, 1, "", "get_nprevpoints_approximation"], [8, 5, 1, "", "normalize_prevalence"], [8, 5, 1, "", "num_prevalence_combinations"], [8, 5, 1, "", "prevalence_from_labels"], [8, 5, 1, "", "prevalence_from_probabilities"], [8, 5, 1, "", "prevalence_linspace"], [8, 5, 1, "", "strprev"], [8, 5, 1, "", "uniform_prevalence_sampling"], [8, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[11, 0, 0, "-", "aggregative"], [11, 0, 0, "-", "base"], [11, 0, 0, "-", "meta"], [11, 0, 0, "-", "neural"], [11, 0, 0, "-", "non_aggregative"]], "quapy.method.aggregative": [[11, 1, 1, "", "ACC"], [11, 4, 1, "", "AdjustedClassifyAndCount"], [11, 1, 1, "", "AggregativeProbabilisticQuantifier"], [11, 1, 1, "", "AggregativeQuantifier"], [11, 1, 1, "", "CC"], [11, 4, 1, "", "ClassifyAndCount"], [11, 1, 1, "", "DistributionMatching"], [11, 1, 1, "", "DyS"], [11, 1, 1, "", "ELM"], [11, 1, 1, "", "EMQ"], [11, 4, 1, "", "ExpectationMaximizationQuantifier"], [11, 4, 1, "", "ExplicitLossMinimisation"], [11, 1, 1, "", "HDy"], [11, 4, 1, "", "HellingerDistanceY"], [11, 1, 1, "", "MAX"], [11, 1, 1, "", "MS"], [11, 1, 1, "", "MS2"], [11, 4, 1, "", "MedianSweep"], [11, 4, 1, "", "MedianSweep2"], [11, 1, 1, "", "OneVsAllAggregative"], [11, 1, 1, "", "PACC"], [11, 1, 1, "", "PCC"], [11, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [11, 4, 1, "", "ProbabilisticClassifyAndCount"], [11, 4, 1, "", "SLD"], [11, 1, 1, "", "SMM"], [11, 1, 1, "", "SVMAE"], [11, 1, 1, "", "SVMKLD"], [11, 1, 1, "", "SVMNKLD"], [11, 1, 1, "", "SVMQ"], [11, 1, 1, "", "SVMRAE"], [11, 1, 1, "", "T50"], [11, 1, 1, "", "ThresholdOptimization"], [11, 1, 1, "", "X"], [11, 5, 1, "", "cross_generate_predictions"], [11, 5, 1, "", "cross_generate_predictions_depr"]], "quapy.method.aggregative.ACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"], [11, 3, 1, "", "solve_adjustment"]], "quapy.method.aggregative.AggregativeProbabilisticQuantifier": [[11, 3, 1, "", "classify"]], "quapy.method.aggregative.AggregativeQuantifier": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 2, 1, "", "classifier"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.aggregative.CC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DistributionMatching": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DyS": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ELM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.EMQ": [[11, 3, 1, "", "EM"], [11, 4, 1, "", "EPSILON"], [11, 4, 1, "", "MAX_ITER"], [11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.OneVsAllAggregative": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"]], "quapy.method.aggregative.PACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.SMM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ThresholdOptimization": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.base": [[11, 1, 1, "", "BaseQuantifier"], [11, 1, 1, "", "BinaryQuantifier"], [11, 1, 1, "", "OneVsAll"], [11, 1, 1, "", "OneVsAllGeneric"], [11, 5, 1, "", "getOneVsAll"]], "quapy.method.base.BaseQuantifier": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.meta": [[11, 5, 1, "", "EACC"], [11, 5, 1, "", "ECC"], [11, 5, 1, "", "EEMQ"], [11, 5, 1, "", "EHDy"], [11, 5, 1, "", "EPACC"], [11, 1, 1, "", "Ensemble"], [11, 5, 1, "", "ensembleFactory"], [11, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[11, 4, 1, "", "VALID_POLICIES"], [11, 2, 1, "", "aggregative"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 2, 1, "", "probabilistic"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.neural": [[11, 1, 1, "", "QuaNetModule"], [11, 1, 1, "", "QuaNetTrainer"], [11, 5, 1, "", "mae_loss"]], "quapy.method.neural.QuaNetModule": [[11, 2, 1, "", "device"], [11, 3, 1, "", "forward"], [11, 4, 1, "", "training"]], "quapy.method.neural.QuaNetTrainer": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "clean_checkpoint"], [11, 3, 1, "", "clean_checkpoint_dir"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[11, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.model_selection": [[8, 1, 1, "", "GridSearchQ"], [8, 5, 1, "", "cross_val_predict"]], "quapy.model_selection.GridSearchQ": [[8, 3, 1, "", "best_model"], [8, 3, 1, "", "fit"], [8, 3, 1, "", "get_params"], [8, 3, 1, "", "quantify"], [8, 3, 1, "", "set_params"]], "quapy.plot": [[8, 5, 1, "", "binary_bias_bins"], [8, 5, 1, "", "binary_bias_global"], [8, 5, 1, "", "binary_diagonal"], [8, 5, 1, "", "brokenbar_supremacy_by_drift"], [8, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[8, 1, 1, "", "APP"], [8, 1, 1, "", "AbstractProtocol"], [8, 1, 1, "", "AbstractStochasticSeededProtocol"], [8, 1, 1, "", "DomainMixer"], [8, 1, 1, "", "NPP"], [8, 1, 1, "", "OnLabelledCollectionProtocol"], [8, 1, 1, "", "USimplexPP"]], "quapy.protocol.APP": [[8, 3, 1, "", "prevalence_grid"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[8, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[8, 3, 1, "", "collator"], [8, 2, 1, "", "random_state"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.NPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[8, 4, 1, "", "RETURN_TYPES"], [8, 3, 1, "", "get_collator"], [8, 3, 1, "", "get_labelled_collection"], [8, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.USimplexPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.util": [[8, 1, 1, "", "EarlyStop"], [8, 5, 1, "", "create_if_not_exist"], [8, 5, 1, "", "create_parent_dir"], [8, 5, 1, "", "download_file"], [8, 5, 1, "", "download_file_if_not_exists"], [8, 5, 1, "", "get_quapy_home"], [8, 5, 1, "", "map_parallel"], [8, 5, 1, "", "parallel"], [8, 5, 1, "", "pickled_resource"], [8, 5, 1, "", "save_text_file"], [8, 5, 1, "", "temp_seed"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"dataset": [0, 10], "review": 0, "twitter": 0, "sentiment": 0, "uci": 0, "machin": 0, "learn": 0, "issu": 0, "ad": 0, "custom": 0, "data": [0, 10], "process": 0, "evalu": [1, 8], "error": [1, 5, 8], "measur": 1, "protocol": [1, 8], "instal": 2, "requir": 2, "svm": 2, "perf": 2, "quantif": [2, 3, 4, 5], "orient": [2, 4], "loss": [2, 3, 4], "method": [3, 9, 11], "aggreg": [3, 11], "The": 3, "classifi": 3, "count": 3, "variant": 3, "expect": 3, "maxim": 3, "emq": 3, "helling": 3, "distanc": 3, "y": 3, "hdy": 3, "explicit": 3, "minim": 3, "meta": [3, 11], "model": [3, 4], "ensembl": 3, "quanet": 3, "neural": [3, 9, 11], "network": 3, "select": 4, "target": 4, "classif": [4, 9], "plot": [5, 8], "diagon": 5, "bia": 5, "drift": 5, "welcom": 6, "quapi": [6, 7, 8, 9, 10, 11], "": 6, "document": 6, "introduct": 6, "A": 6, "quick": 6, "exampl": 6, "featur": 6, "content": [6, 8, 9, 10, 11], "indic": 6, "tabl": 6, "packag": [8, 9, 10, 11], "subpackag": 8, "submodul": [8, 9, 10, 11], "function": 8, "model_select": 8, "util": 8, "modul": [8, 9, 10, 11], "calibr": 9, "svmperf": 9, "base": [10, 11], "preprocess": 10, "reader": 10, "non_aggreg": 11}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Datasets": [[0, "datasets"]], "Reviews Datasets": [[0, "reviews-datasets"]], "Twitter Sentiment Datasets": [[0, "twitter-sentiment-datasets"]], "UCI Machine Learning": [[0, "uci-machine-learning"]], "Issues:": [[0, "issues"]], "Adding Custom Datasets": [[0, "adding-custom-datasets"]], "Data Processing": [[0, "data-processing"]], "Evaluation": [[1, "evaluation"]], "Error Measures": [[1, "error-measures"]], "Evaluation Protocols": [[1, "evaluation-protocols"]], "Installation": [[2, "installation"]], "Requirements": [[2, "requirements"]], "SVM-perf with quantification-oriented losses": [[2, "svm-perf-with-quantification-oriented-losses"]], "Quantification Methods": [[3, "quantification-methods"]], "Aggregative Methods": [[3, "aggregative-methods"]], "The Classify & Count variants": [[3, "the-classify-count-variants"]], "Expectation Maximization (EMQ)": [[3, "expectation-maximization-emq"]], "Hellinger Distance y (HDy)": [[3, "hellinger-distance-y-hdy"]], "Explicit Loss Minimization": [[3, "explicit-loss-minimization"]], "Meta Models": [[3, "meta-models"]], "Ensembles": [[3, "ensembles"]], "The QuaNet neural network": [[3, "the-quanet-neural-network"]], "Model Selection": [[4, "model-selection"]], "Targeting a Quantification-oriented loss": [[4, "targeting-a-quantification-oriented-loss"]], "Targeting a Classification-oriented loss": [[4, "targeting-a-classification-oriented-loss"]], "Plotting": [[5, "plotting"]], "Diagonal Plot": [[5, "diagonal-plot"]], "Quantification bias": [[5, "quantification-bias"]], "Error by Drift": [[5, "error-by-drift"]], "Welcome to QuaPy\u2019s documentation!": [[6, "welcome-to-quapy-s-documentation"]], "Introduction": [[6, "introduction"]], "A quick example:": [[6, "a-quick-example"]], "Features": [[6, "features"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "quapy": [[7, "quapy"]], "quapy package": [[8, "quapy-package"]], "Submodules": [[8, "submodules"], [9, "submodules"], [10, "submodules"], [11, "submodules"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.protocol": [[8, "quapy-protocol"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.util": [[8, "module-quapy.util"]], "Subpackages": [[8, "subpackages"]], "Module contents": [[8, "module-quapy"], [9, "module-quapy.classification"], [10, "module-quapy.data"], [11, "module-quapy.method"]], "quapy.classification package": [[9, "quapy-classification-package"]], "quapy.classification.calibration": [[9, "quapy-classification-calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "quapy.data package": [[10, "quapy-data-package"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "quapy.method package": [[11, "quapy-method-package"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[8, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol"]], "domainmixer (class in quapy.protocol)": [[8, "quapy.protocol.DomainMixer"]], "earlystop (class in quapy.util)": [[8, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[8, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[8, "quapy.functional.HellingerDistance"]], "npp (class in quapy.protocol)": [[8, "quapy.protocol.NPP"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[8, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "topsoedistance() (in module quapy.functional)": [[8, "quapy.functional.TopsoeDistance"]], "usimplexpp (class in quapy.protocol)": [[8, "quapy.protocol.USimplexPP"]], "absolute_error() (in module quapy.error)": [[8, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[8, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[8, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[8, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[8, "quapy.error.ae"]], "best_model() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[8, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[8, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[8, "quapy.functional.check_prevalence_vector"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "create_if_not_exist() (in module quapy.util)": [[8, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[8, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[8, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[8, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[8, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[8, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluate"]], "evaluation_report() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluation_report"]], "f1_error() (in module quapy.error)": [[8, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[8, "quapy.error.f1e"]], "fit() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[8, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[8, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[8, "quapy.util.get_quapy_home"]], "kld() (in module quapy.error)": [[8, "quapy.error.kld"]], "mae() (in module quapy.error)": [[8, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[8, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[8, "quapy.error.mkld"]], "mnkld() (in module quapy.error)": [[8, "quapy.error.mnkld"]], "module": [[8, "module-quapy"], [8, "module-quapy.error"], [8, "module-quapy.evaluation"], [8, "module-quapy.functional"], [8, "module-quapy.model_selection"], [8, "module-quapy.plot"], [8, "module-quapy.protocol"], [8, "module-quapy.util"], [9, "module-quapy.classification"], [9, "module-quapy.classification.calibration"], [9, "module-quapy.classification.methods"], [9, "module-quapy.classification.neural"], [9, "module-quapy.classification.svmperf"], [10, "module-quapy.data"], [10, "module-quapy.data.base"], [10, "module-quapy.data.datasets"], [10, "module-quapy.data.preprocessing"], [10, "module-quapy.data.reader"], [11, "module-quapy.method"], [11, "module-quapy.method.aggregative"], [11, "module-quapy.method.base"], [11, "module-quapy.method.meta"], [11, "module-quapy.method.neural"], [11, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[8, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[8, "quapy.error.mse"]], "nkld() (in module quapy.error)": [[8, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[8, "quapy.functional.normalize_prevalence"]], "num_prevalence_combinations() (in module quapy.functional)": [[8, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "parallel() (in module quapy.util)": [[8, "quapy.util.parallel"]], "pickled_resource() (in module quapy.util)": [[8, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[8, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[8, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[8, "module-quapy"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.protocol": [[8, "module-quapy.protocol"]], "quapy.util": [[8, "module-quapy.util"]], "rae() (in module quapy.error)": [[8, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[8, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[8, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[8, "quapy.error.smooth"]], "strprev() (in module quapy.functional)": [[8, "quapy.functional.strprev"]], "temp_seed() (in module quapy.util)": [[8, "quapy.util.temp_seed"]], "total() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[8, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_simplex_sampling"]], "bctscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.BCTSCalibration"]], "cnnnet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.CNNnet"]], "lstmnet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.LSTMnet"]], "lowranklogisticregression (class in quapy.classification.methods)": [[9, "quapy.classification.methods.LowRankLogisticRegression"]], "nbvscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.NBVSCalibration"]], "neuralclassifiertrainer (class in quapy.classification.neural)": [[9, "quapy.classification.neural.NeuralClassifierTrainer"]], "recalibratedprobabilisticclassifier (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifier"]], "recalibratedprobabilisticclassifierbase (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase"]], "svmperf (class in quapy.classification.svmperf)": [[9, "quapy.classification.svmperf.SVMperf"]], "tscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.TSCalibration"]], "textclassifiernet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.TextClassifierNet"]], "torchdataset (class in quapy.classification.neural)": [[9, "quapy.classification.neural.TorchDataset"]], "vscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.VSCalibration"]], "asdataloader() (quapy.classification.neural.torchdataset method)": [[9, "quapy.classification.neural.TorchDataset.asDataloader"]], "classes_ (quapy.classification.calibration.recalibratedprobabilisticclassifierbase property)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.classes_"]], "decision_function() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.decision_function"]], "device (quapy.classification.neural.neuralclassifiertrainer property)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.device"]], "dimensions() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.dimensions"]], "document_embedding() (quapy.classification.neural.cnnnet method)": [[9, "quapy.classification.neural.CNNnet.document_embedding"]], "document_embedding() (quapy.classification.neural.lstmnet method)": [[9, "quapy.classification.neural.LSTMnet.document_embedding"]], "document_embedding() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.document_embedding"]], "fit() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit"]], "fit() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.fit"]], "fit() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.fit"]], "fit() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.fit"]], "fit_cv() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_cv"]], "fit_tr_val() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_tr_val"]], "forward() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.forward"]], "get_params() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.get_params"]], "get_params() (quapy.classification.neural.cnnnet method)": [[9, "quapy.classification.neural.CNNnet.get_params"]], "get_params() (quapy.classification.neural.lstmnet method)": [[9, "quapy.classification.neural.LSTMnet.get_params"]], "get_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.get_params"]], "get_params() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.get_params"]], "predict() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict"]], "predict() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.predict"]], "predict() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.predict"]], "predict() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.predict"]], "predict_proba() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict_proba"]], "predict_proba() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.predict_proba"]], "predict_proba() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.predict_proba"]], "predict_proba() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.predict_proba"]], "quapy.classification": [[9, "module-quapy.classification"]], "quapy.classification.calibration": [[9, "module-quapy.classification.calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "reset_net_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.reset_net_params"]], "set_params() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.set_params"]], "set_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.set_params"]], "set_params() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.set_params"]], "training (quapy.classification.neural.cnnnet attribute)": [[9, "quapy.classification.neural.CNNnet.training"]], "training (quapy.classification.neural.lstmnet attribute)": [[9, "quapy.classification.neural.LSTMnet.training"]], "training (quapy.classification.neural.textclassifiernet attribute)": [[9, "quapy.classification.neural.TextClassifierNet.training"]], "transform() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.transform"]], "transform() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.transform"]], "valid_losses (quapy.classification.svmperf.svmperf attribute)": [[9, "quapy.classification.svmperf.SVMperf.valid_losses"]], "vocabulary_size (quapy.classification.neural.cnnnet property)": [[9, "quapy.classification.neural.CNNnet.vocabulary_size"]], "vocabulary_size (quapy.classification.neural.lstmnet property)": [[9, "quapy.classification.neural.LSTMnet.vocabulary_size"]], "vocabulary_size (quapy.classification.neural.textclassifiernet property)": [[9, "quapy.classification.neural.TextClassifierNet.vocabulary_size"]], "xavier_uniform() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.xavier_uniform"]], "dataset (class in quapy.data.base)": [[10, "quapy.data.base.Dataset"]], "indextransformer (class in quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.IndexTransformer"]], "labelledcollection (class in quapy.data.base)": [[10, "quapy.data.base.LabelledCollection"]], "splitstratified() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.SplitStratified"]], "x (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.X"]], "xp (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xp"]], "xy (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xy"]], "add_word() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.add_word"]], "binarize() (in module quapy.data.reader)": [[10, "quapy.data.reader.binarize"]], "binary (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.binary"]], "binary (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.binary"]], "classes_ (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.classes_"]], "counts() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.counts"]], "fetch_ucidataset() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCIDataset"]], "fetch_ucilabelledcollection() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCILabelledCollection"]], "fetch_lequa2022() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_lequa2022"]], "fetch_reviews() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_reviews"]], "fetch_twitter() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_twitter"]], "fit() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit"]], "fit_transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit_transform"]], "from_csv() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_csv"]], "from_sparse() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_sparse"]], "from_text() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_text"]], "index() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.index"]], "kfcv() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.kFCV"]], "kfcv() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.kFCV"]], "load() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.load"]], "load() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.load"]], "mix() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.mix"]], "n_classes (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.n_classes"]], "n_classes (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.n_classes"]], "p (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.p"]], "prevalence() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.prevalence"]], "quapy.data": [[10, "module-quapy.data"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "reduce_columns() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.reduce_columns"]], "reindex_labels() (in module quapy.data.reader)": [[10, "quapy.data.reader.reindex_labels"]], "sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling"]], "sampling_from_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_from_index"]], "sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_index"]], "split_random() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_random"]], "split_stratified() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_stratified"]], "standardize() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.standardize"]], "stats() (quapy.data.base.dataset method)": [[10, "quapy.data.base.Dataset.stats"]], "stats() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.stats"]], "text2tfidf() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.text2tfidf"]], "train_test (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.train_test"]], "transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.transform"]], "uniform_sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling"]], "uniform_sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling_index"]], "vocabulary_size (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.vocabulary_size"]], "vocabulary_size() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.vocabulary_size"]], "warn() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.warn"]], "y (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.y"]], "acc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativeprobabilisticquantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier"]], "aggregativequantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeQuantifier"]], "basequantifier (class in quapy.method.base)": [[11, "quapy.method.base.BaseQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[11, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.CC"]], "classifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ClassifyAndCount"]], "distributionmatching (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DistributionMatching"]], "dys (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[11, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[11, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[11, "quapy.method.meta.EHDy"]], "elm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ELM"]], "em() (quapy.method.aggregative.emq class method)": [[11, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.EMQ"]], "epacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[11, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "explicitlossminimisation (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExplicitLossMinimisation"]], "hdy (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.HellingerDistanceY"]], "max (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.MAX_ITER"]], "ms (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS"]], "ms2 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "mediansweep (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep"]], "mediansweep2 (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep2"]], "onevsall (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAll"]], "onevsallaggregative (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.OneVsAllAggregative"]], "onevsallgeneric (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetModule"]], "quanettrainer (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetTrainer"]], "sld (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SMM"]], "svmae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMAE"]], "svmkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMKLD"]], "svmnkld (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMNKLD"]], "svmq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMQ"]], "svmrae (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SVMRAE"]], "t50 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.T50"]], "thresholdoptimization (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[11, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.X"]], "aggregate() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsallaggregative method)": [[11, "quapy.method.aggregative.OneVsAllAggregative.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.aggregate"]], "aggregate() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.aggregate"]], "aggregative (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.aggregative"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.base.onevsallgeneric property)": [[11, "quapy.method.base.OneVsAllGeneric.classes_"]], "classes_ (quapy.method.neural.quanettrainer property)": [[11, "quapy.method.neural.QuaNetTrainer.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classify() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.classify"]], "classify() (quapy.method.aggregative.aggregativeprobabilisticquantifier method)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier.classify"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.classify"]], "classify() (quapy.method.aggregative.onevsallaggregative method)": [[11, "quapy.method.aggregative.OneVsAllAggregative.classify"]], "classify() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.classify"]], "clean_checkpoint() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint_dir"]], "cross_generate_predictions() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions"]], "cross_generate_predictions_depr() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions_depr"]], "device (quapy.method.neural.quanetmodule property)": [[11, "quapy.method.neural.QuaNetModule.device"]], "ensemblefactory() (in module quapy.method.meta)": [[11, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.fit"]], "fit() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.fit"]], "fit() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.fit"]], "fit() (quapy.method.aggregative.elm method)": [[11, "quapy.method.aggregative.ELM.fit"]], "fit() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.fit"]], "fit() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.fit"]], "fit() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.fit"]], "fit() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.fit"]], "fit() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.fit"]], "fit() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.fit"]], "fit() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "forward() (quapy.method.neural.quanetmodule method)": [[11, "quapy.method.neural.QuaNetModule.forward"]], "getonevsall() (in module quapy.method.base)": [[11, "quapy.method.base.getOneVsAll"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[11, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.get_params"]], "get_probability_distribution() (in module quapy.method.meta)": [[11, "quapy.method.meta.get_probability_distribution"]], "mae_loss() (in module quapy.method.neural)": [[11, "quapy.method.neural.mae_loss"]], "predict_proba() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quapy.method": [[11, "module-quapy.method"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]], "set_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.set_params"]], "solve_adjustment() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.solve_adjustment"]], "training (quapy.method.neural.quanetmodule attribute)": [[11, "quapy.method.neural.QuaNetModule.training"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["Datasets", "Evaluation", "Installation", "Methods", "Model-Selection", "Plotting", "index", "modules", "quapy", "quapy.classification", "quapy.data", "quapy.method"], "filenames": ["Datasets.md", "Evaluation.md", "Installation.rst", "Methods.md", "Model-Selection.md", "Plotting.md", "index.rst", "modules.rst", "quapy.rst", "quapy.classification.rst", "quapy.data.rst", "quapy.method.rst"], "titles": ["Datasets", "Evaluation", "Installation", "Quantification Methods", "Model Selection", "Plotting", "Welcome to QuaPy\u2019s documentation!", "quapy", "quapy package", "quapy.classification package", "quapy.data package", "quapy.method package"], "terms": {"quapi": [0, 1, 2, 3, 4, 5], "make": [0, 1, 3, 8, 11], "avail": [0, 1, 2, 3, 5, 6, 9, 11], "sever": [0, 10], "have": [0, 1, 2, 3, 4, 5, 8, 10, 11], "been": [0, 3, 4, 5, 8, 9, 10, 11], "us": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "quantif": [0, 1, 6, 8, 9, 10, 11], "literatur": [0, 1, 4, 6], "well": [0, 3, 4, 5, 11], "an": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "interfac": [0, 1, 11], "allow": [0, 1, 2, 3, 5, 8, 9, 10, 11], "anyon": 0, "import": [0, 1, 3, 4, 5, 6, 10, 11], "A": [0, 3, 8, 9, 10, 11], "object": [0, 8, 9, 10, 11], "i": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "roughli": 0, "pair": [0, 8], "labelledcollect": [0, 3, 4, 8, 10, 11], "one": [0, 1, 3, 4, 5, 8, 10, 11], "plai": 0, "role": 0, "train": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "set": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "anoth": [0, 1, 3, 5, 8, 10], "test": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "class": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "consist": [0, 4, 5, 8, 9, 10, 11], "iter": [0, 8, 11], "instanc": [0, 3, 4, 5, 6, 8, 9, 10, 11], "label": [0, 3, 4, 5, 6, 8, 9, 10, 11], "thi": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "handl": 0, "most": [0, 3, 5, 6, 8, 10, 11], "sampl": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "function": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11], "take": [0, 3, 5, 8, 10, 11], "look": [0, 1, 3, 5, 11], "follow": [0, 1, 3, 4, 5, 6, 8, 11], "code": [0, 3, 4, 5, 9], "qp": [0, 1, 3, 4, 5, 6, 8, 10, 11], "f": [0, 1, 3, 4, 5, 6, 10], "1st": 0, "posit": [0, 3, 5, 8, 10, 11], "document": [0, 1, 3, 5, 9, 10, 11], "2nd": 0, "onli": [0, 3, 5, 8, 9, 10, 11], "neg": [0, 5, 8, 11], "neutral": 0, "3rd": 0, "2": [0, 1, 3, 5, 8, 10, 11], "0": [0, 1, 3, 4, 5, 8, 9, 10, 11], "1": [0, 1, 3, 4, 5, 8, 9, 10, 11], "print": [0, 1, 3, 4, 6, 9, 10], "strprev": [0, 1, 8], "preval": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "prec": [0, 8], "output": [0, 1, 3, 4, 8, 9, 10, 11], "show": [0, 1, 3, 4, 5, 8, 9, 10, 11], "digit": 0, "precis": [0, 1, 8], "17": 0, "50": [0, 5, 8, 11], "33": [0, 5, 8], "One": [0, 1, 3, 11], "can": [0, 1, 2, 3, 4, 5, 8, 10, 11], "easili": [0, 2, 5, 9], "produc": [0, 1, 5, 8], "new": [0, 3, 8, 9, 10], "desir": [0, 1, 10], "sample_s": [0, 1, 3, 4, 5, 8, 11], "10": [0, 1, 4, 5, 8, 9, 11], "prev": [0, 1, 8, 10], "4": [0, 1, 3, 4, 5, 10, 11], "5": [0, 1, 3, 4, 5, 8, 9, 10, 11], "which": [0, 1, 3, 4, 5, 8, 9, 10, 11], "40": [0, 3, 4, 11], "made": [0, 2, 8, 10, 11], "across": [0, 1, 4, 5, 6, 8, 11], "differ": [0, 1, 3, 4, 5, 6, 8, 10, 11], "run": [0, 1, 2, 3, 4, 5, 8, 10, 11], "e": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "g": [0, 1, 3, 4, 6, 8, 10, 11], "method": [0, 1, 4, 5, 6, 8], "same": [0, 3, 5, 8, 10, 11], "exact": [0, 10], "retain": [0, 3, 9, 11], "index": [0, 3, 6, 8, 9, 10, 11], "gener": [0, 1, 3, 4, 5, 8, 9, 10, 11], "sampling_index": [0, 10], "sampling_from_index": [0, 10], "also": [0, 1, 2, 3, 5, 6, 8, 9], "implement": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "artifici": [0, 1, 3, 4, 5, 6, 8], "protocol": [0, 3, 4, 5, 6, 7, 10, 11], "via": [0, 2, 3, 8, 9, 11], "python": [0, 6], "": [0, 1, 3, 4, 5, 8, 9, 10, 11], "seri": [0, 10], "equidist": [0, 8], "rang": [0, 5, 8, 11], "entir": [0, 3, 4, 5, 8], "spectrum": [0, 1, 4, 5, 8], "simplex": [0, 8], "space": [0, 4, 8, 9], "artificial_sampling_gener": 0, "100": [0, 1, 3, 4, 5, 8, 9, 10, 11], "n_preval": [0, 8], "each": [0, 1, 3, 4, 5, 8, 9, 10, 11], "valid": [0, 1, 3, 4, 5, 8, 9, 10, 11], "combin": [0, 1, 4, 8, 11], "origin": [0, 3, 8, 10], "from": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "split": [0, 3, 4, 5, 8, 9, 10, 11], "point": [0, 1, 3, 8, 10], "25": [0, 5, 8, 9, 11], "75": [0, 5, 8], "00": [0, 1, 4], "see": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "evalu": [0, 3, 4, 5, 6, 7, 9, 10, 11], "wiki": [0, 3], "further": [0, 1, 3, 9, 10, 11], "detail": [0, 1, 3, 6, 9, 10, 11], "how": [0, 1, 3, 4, 5, 8, 10, 11], "properli": [0, 11], "three": [0, 5], "about": [0, 5, 8, 10], "kindl": [0, 1, 3, 5, 10, 11], "devic": [0, 3, 5, 9, 11], "harri": 0, "potter": 0, "known": [0, 3, 4, 8, 11], "imdb": [0, 5, 10], "movi": 0, "fetch": [0, 6], "unifi": [0, 11], "For": [0, 1, 5, 6, 8, 10], "exampl": [0, 1, 3, 4, 5, 8, 9, 10, 11], "fetch_review": [0, 1, 3, 4, 5, 10, 11], "These": [0, 9], "esuli": [0, 2, 3, 9, 10, 11], "moreo": [0, 3, 4, 10, 11], "sebastiani": [0, 3, 4, 10, 11], "2018": [0, 3, 10], "octob": [0, 3], "recurr": [0, 3, 10], "neural": [0, 8, 10], "network": [0, 8, 9, 10, 11], "In": [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11], "proceed": [0, 3, 10], "27th": [0, 3, 10], "acm": [0, 3, 10, 11], "intern": [0, 3, 9, 10], "confer": [0, 3, 9, 10], "inform": [0, 1, 3, 4, 8, 9, 10, 11], "knowledg": [0, 3, 10], "manag": [0, 3, 10], "pp": [0, 3, 9], "1775": [0, 3], "1778": [0, 3], "The": [0, 1, 2, 4, 5, 6, 8, 9, 10, 11], "list": [0, 5, 8, 9, 10, 11], "id": [0, 3, 10], "reviews_sentiment_dataset": [0, 10], "some": [0, 1, 3, 5, 8, 10, 11], "statist": [0, 1, 8, 11], "fhe": 0, "ar": [0, 1, 3, 4, 5, 8, 9, 10, 11], "summar": 0, "below": [0, 2, 3, 5, 8, 10], "size": [0, 1, 3, 8, 9, 10, 11], "type": [0, 3, 8, 10, 11], "hp": [0, 3, 4, 10], "9533": 0, "18399": 0, "018": 0, "982": 0, "065": 0, "935": 0, "text": [0, 3, 8, 9, 10, 11], "3821": [0, 10], "21591": [0, 10], "081": [0, 10], "919": [0, 10], "063": [0, 10], "937": [0, 10], "25000": 0, "500": [0, 1, 4, 5, 11], "11": [0, 1, 6, 8], "analysi": [0, 3, 6, 10], "access": [0, 3, 10, 11], "were": 0, "tf": [0, 10], "idf": 0, "format": [0, 5, 10, 11], "present": [0, 3, 10], "two": [0, 1, 3, 4, 5, 8, 10, 11], "val": [0, 9, 10], "model": [0, 1, 5, 6, 8, 9, 11], "select": [0, 3, 6, 8, 10, 11], "purpos": [0, 11], "exemplifi": 0, "load": [0, 3, 8, 10, 11], "fetch_twitt": [0, 3, 6, 10], "gasp": [0, 10], "for_model_select": [0, 10], "true": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "gao": [0, 3, 10, 11], "w": [0, 3, 10], "2015": [0, 2, 3, 9, 11], "august": 0, "tweet": [0, 3, 10], "classif": [0, 1, 3, 6, 8, 10, 11], "ieee": 0, "advanc": [0, 6, 8], "social": [0, 3, 10], "mine": [0, 3], "asonam": 0, "97": 0, "104": 0, "semeval13": [0, 10], "semeval14": [0, 10], "semeval15": [0, 10], "share": [0, 10], "semev": 0, "mean": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "would": [0, 1, 3, 5, 6, 10, 11], "get": [0, 1, 5, 8, 9, 10, 11], "when": [0, 1, 3, 4, 5, 8, 9, 10], "request": [0, 8, 10, 11], "ani": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "them": [0, 3, 10, 11], "consult": [0, 1], "twitter_sentiment_datasets_test": [0, 10], "9": [0, 1, 3, 5, 8], "replac": [0, 3, 8, 10], "twitter_sentiment_datasets_train": [0, 10], "found": [0, 3, 4, 8, 9, 10], "featur": [0, 10], "3": [0, 1, 3, 5, 6, 8, 9, 10, 11], "8788": 0, "3765": 0, "694582": 0, "421": 0, "496": 0, "082": [0, 1], "407": 0, "507": 0, "086": 0, "spars": [0, 10], "hcr": [0, 3, 10], "1594": 0, "798": 0, "222046": 0, "546": 0, "211": 0, "243": 0, "640": 0, "167": 0, "193": 0, "omd": [0, 10], "1839": 0, "787": 0, "199151": 0, "463": 0, "271": 0, "266": 0, "437": 0, "283": [0, 1], "280": 0, "sander": [0, 10], "2155": 0, "923": 0, "229399": 0, "161": 0, "691": 0, "148": 0, "164": [0, 3], "688": 0, "11338": 0, "3813": 0, "1215742": 0, "159": 0, "470": 0, "372": 0, "158": 0, "430": 0, "412": 0, "1853": 0, "109": 0, "361": 0, "530": 0, "2390": 0, "153": 0, "413": 0, "434": 0, "semeval16": [0, 6, 10], "8000": 0, "2000": 0, "889504": 0, "157": 0, "351": 0, "492": 0, "163": [0, 1], "341": 0, "497": 0, "sst": [0, 10], "2971": 0, "1271": 0, "376132": 0, "261": 0, "452": 0, "288": 0, "207": 0, "481": 0, "312": 0, "wa": [0, 3, 5, 8, 10, 11], "2184": 0, "936": 0, "248563": 0, "305": 0, "414": 0, "281": 0, "282": 0, "446": 0, "272": 0, "wb": [0, 10], "4259": 0, "1823": 0, "404333": 0, "270": 0, "392": 0, "337": 0, "274": 0, "335": 0, "32": [0, 6], "repositori": [0, 10], "p\u00e9rez": [0, 3, 10, 11], "g\u00e1llego": [0, 3, 10, 11], "p": [0, 3, 8, 9, 10, 11], "quevedo": [0, 3, 10], "j": [0, 3, 10, 11], "r": [0, 3, 8, 10], "del": [0, 3, 10], "coz": [0, 3, 10], "2017": [0, 3, 10, 11], "ensembl": [0, 6, 10, 11], "problem": [0, 3, 5, 8, 10, 11], "characteriz": [0, 3, 10], "chang": [0, 1, 3, 10], "distribut": [0, 3, 5, 8, 10, 11], "case": [0, 1, 3, 4, 5, 8, 9, 10, 11], "studi": [0, 3, 10], "fusion": [0, 3, 10], "34": [0, 3, 10, 11], "87": [0, 3, 10], "doe": [0, 2, 3, 8, 11], "exactli": 0, "coincid": [0, 6], "et": [0, 2, 9, 10, 11], "al": [0, 2, 9, 10, 11], "sinc": [0, 1, 3, 5, 10, 11], "we": [0, 1, 3, 4, 5, 6, 10], "unabl": 0, "find": [0, 4, 11], "diabet": 0, "phonem": 0, "call": [0, 1, 5, 8, 10, 11], "fetch_ucidataset": [0, 3, 10], "yeast": [0, 10], "verbos": [0, 1, 4, 8, 9, 10, 11], "return": [0, 1, 3, 4, 5, 8, 9, 10, 11], "randomli": [0, 10], "drawn": [0, 1, 4, 8, 10], "stratifi": [0, 3, 9, 10, 11], "manner": [0, 9, 11], "whole": [0, 1, 3, 4, 8, 9], "collect": [0, 8, 9, 10], "70": 0, "30": [0, 1, 3, 11], "respect": [0, 1, 5, 8, 11], "option": [0, 1, 3, 5, 10, 11], "indic": [0, 1, 3, 4, 5, 8, 9, 10, 11], "descript": [0, 10], "should": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "standard": [0, 1, 5, 8, 9, 10, 11], "paper": [0, 3, 9, 11], "submit": 0, "kfcv": [0, 9, 10, 11], "order": [0, 2, 3, 5, 8, 10, 11], "accommod": 0, "practic": [0, 4], "could": [0, 1, 3, 4, 5, 6], "first": [0, 1, 2, 3, 5, 8, 10, 11], "instanti": [0, 1, 3, 4, 8, 9, 11], "creat": [0, 6, 8, 11], "time": [0, 1, 3, 8, 10, 11], "fetch_ucilabelledcollect": [0, 10], "nfold": [0, 8, 10], "nrepeat": [0, 10], "abov": [0, 3, 5, 8], "conduct": [0, 8], "2x5fcv": 0, "all": [0, 1, 2, 3, 5, 8, 9, 11], "come": [0, 8, 10, 11], "numer": [0, 1, 3, 6, 10, 11], "form": [0, 8, 10, 11], "dens": [0, 11], "matric": [0, 5, 10], "acut": 0, "120": 0, "6": [0, 1, 3, 5, 10], "508": 0, "b": [0, 8, 10, 11], "583": 0, "417": 0, "balanc": [0, 4, 11], "625": 0, "539": 0, "461": 0, "922": 0, "078": 0, "breast": 0, "cancer": 0, "683": 0, "350": 0, "650": 0, "cmc": 0, "1473": 0, "573": 0, "427": 0, "774": 0, "226": 0, "653": 0, "347": 0, "ctg": 0, "2126": 0, "22": [0, 3, 9, 10], "222": [0, 9], "778": 0, "861": 0, "139": 0, "917": 0, "083": 0, "german": 0, "1000": [0, 4, 11], "24": [0, 9], "300": [0, 1, 9], "700": 0, "haberman": [0, 3], "306": 0, "735": 0, "265": 0, "ionospher": 0, "641": 0, "359": 0, "iri": 0, "150": 0, "667": 0, "333": 0, "mammograph": 0, "830": 0, "514": 0, "486": 0, "pageblock": 0, "5473": 0, "979": 0, "021": 0, "semeion": 0, "1593": 0, "256": [0, 9], "901": 0, "099": 0, "sonar": 0, "208": 0, "60": 0, "534": 0, "466": 0, "spambas": 0, "4601": 0, "57": 0, "606": 0, "394": 0, "spectf": 0, "267": 0, "44": 0, "794": 0, "206": 0, "tictacto": 0, "958": 0, "transfus": 0, "748": 0, "762": 0, "238": 0, "wdbc": 0, "569": 0, "627": 0, "373": 0, "wine": 0, "178": 0, "13": [0, 9], "669": 0, "331": 0, "601": 0, "399": 0, "730": 0, "q": [0, 2, 3, 8, 9, 11], "red": 0, "1599": 0, "465": 0, "535": 0, "white": 0, "4898": 0, "665": 0, "1484": 0, "8": [0, 1, 5, 10, 11], "711": 0, "289": 0, "download": [0, 2, 3, 8, 10], "automat": [0, 1, 9], "thei": [0, 3, 11], "store": [0, 9, 10, 11], "quapy_data": [0, 8], "folder": [0, 10, 11], "faster": [0, 10], "reus": [0, 3, 8, 10], "howev": [0, 4, 5], "requir": [0, 1, 3, 6, 9], "special": [0, 5, 10], "action": 0, "moment": [0, 3], "fulli": [0, 8], "autom": [0, 3, 6], "cardiotocographi": 0, "excel": 0, "file": [0, 5, 8, 9, 10, 11], "user": [0, 1, 5], "instal": [0, 3, 6, 9, 11], "xlrd": [0, 2], "modul": [0, 1, 3, 5, 6, 7], "open": [0, 6, 10], "page": [0, 2, 6], "block": [0, 8], "need": [0, 3, 8, 10, 11], "unix": 0, "compress": 0, "extens": [0, 2, 5], "z": [0, 10], "directli": [0, 1, 3], "doabl": 0, "packag": [0, 2, 3, 6, 7], "like": [0, 1, 3, 5, 8, 9, 10, 11], "gzip": 0, "zip": [0, 5, 8], "uncompress": 0, "o": [0, 8], "depend": [0, 1, 4, 5, 8, 11], "softwar": 0, "manual": 0, "do": [0, 1, 3, 4, 8, 9, 10, 11], "invok": [0, 1, 3, 8, 10], "provid": [0, 3, 5, 6, 10, 11], "loader": [0, 10], "simpl": [0, 3, 5, 11], "deal": 0, "t": [0, 1, 3, 8, 9, 11], "pre": [0, 3, 8], "n": [0, 1, 8, 9, 11], "second": [0, 1, 3, 5, 8, 10], "represent": [0, 3, 8, 9, 11], "col": [0, 10], "int": [0, 5, 8, 10, 11], "float": [0, 3, 8, 9, 10, 11], "charg": [0, 10], "classmethod": [0, 8, 10, 11], "def": [0, 1, 3, 5, 8], "cl": 0, "path": [0, 3, 5, 8, 9, 10, 11], "str": [0, 8, 10, 11], "loader_func": [0, 10], "callabl": [0, 8, 10, 11], "defin": [0, 3, 8, 9, 10, 11], "argument": [0, 1, 3, 5, 8, 10, 11], "initi": [0, 9, 11], "particular": [0, 1, 3, 11], "receiv": [0, 3, 5], "addition": 0, "number": [0, 1, 3, 5, 8, 9, 10, 11], "specifi": [0, 1, 3, 5, 8, 9, 10], "otherwis": [0, 3, 8, 10], "infer": [0, 10], "least": [0, 10], "pass": [0, 1, 5, 8, 9, 11], "along": [0, 3, 8, 11], "train_path": [0, 10], "my_data": 0, "dat": [0, 9], "test_path": [0, 10], "my_custom_load": 0, "rb": 0, "fin": 0, "preprocess": [0, 1, 3, 8, 11], "includ": [0, 1, 3, 5, 6, 10, 11], "text2tfidf": [0, 1, 3, 10], "tfidf": [0, 4, 5, 10], "vector": [0, 8, 9, 10, 11], "reduce_column": [0, 10], "reduc": [0, 10], "column": [0, 10], "base": [0, 3, 6, 8, 9], "term": [0, 1, 3, 4, 5, 6, 8, 9, 10, 11], "frequenc": [0, 10, 11], "transform": [0, 9, 10, 11], "valu": [0, 1, 3, 8, 9, 10, 11], "score": [0, 1, 4, 8, 9, 10], "subtract": [0, 8, 10], "normal": [0, 1, 3, 8, 10, 11], "deviat": [0, 1, 5, 8, 10], "so": [0, 1, 3, 5, 8, 9, 10, 11], "zero": [0, 8], "unit": [0, 8], "varianc": [0, 5], "textual": [0, 6, 10], "token": [0, 9, 10], "appeal": 1, "tool": [1, 6], "scenario": [1, 3, 4, 5, 6], "dataset": [1, 3, 4, 5, 6, 8, 9, 11], "shift": [1, 4, 6, 8, 9, 11], "particularli": 1, "prior": [1, 3, 4, 5, 6, 8, 11], "probabl": [1, 3, 4, 5, 6, 8, 9, 11], "That": [1, 4], "interest": [1, 5, 6, 8], "estim": [1, 3, 5, 6, 8, 9, 10, 11], "aris": 1, "under": 1, "belief": 1, "those": [1, 3, 4, 5, 8, 9, 11], "might": [1, 8, 10], "ones": [1, 3, 5, 8, 10, 11], "observ": [1, 11], "dure": [1, 5, 11], "other": [1, 3, 5, 6, 8, 10, 11], "word": [1, 3, 6, 9, 10, 11], "simpli": [1, 2, 3, 4, 5, 6, 8, 11], "predictor": 1, "assum": [1, 6, 11], "unlik": [1, 4, 8], "machin": [1, 4, 6, 9], "learn": [1, 2, 3, 4, 6, 8, 9, 10, 11], "govern": 1, "iid": [1, 5, 6], "assumpt": [1, 5, 6], "brief": [1, 10], "dedic": [1, 10], "explain": [1, 5], "here": [1, 11], "mae": [1, 4, 6, 8, 9, 11], "absolut": [1, 3, 5, 6, 8, 11], "mrae": [1, 6, 8, 9, 11], "rel": [1, 3, 8, 10, 11], "mse": [1, 3, 6, 8, 11], "squar": [1, 3, 8], "mkld": [1, 8, 11], "kullback": [1, 3, 8, 11], "leibler": [1, 3, 8, 11], "diverg": [1, 3, 8, 11], "mnkld": [1, 8, 11], "ae": [1, 2, 5, 8], "rae": [1, 2, 8], "se": [1, 8], "kld": [1, 2, 8, 9, 11], "nkld": [1, 2, 6, 8, 9, 11], "individu": [1, 3], "without": [1, 3, 8, 10], "averag": [1, 3, 8, 10, 11], "acc": [1, 3, 5, 6, 8, 11], "accuraci": [1, 5, 8, 11], "f1e": [1, 8], "f1": [1, 8, 9], "true_prev": [1, 5, 8], "prevs_hat": [1, 8], "ndarrai": [1, 3, 8, 10, 11], "contain": [1, 2, 3, 5, 8, 9, 10, 11], "smooth": [1, 8], "stabil": [1, 11], "third": [1, 5], "ep": [1, 8], "none": [1, 4, 8, 9, 10, 11], "paramet": [1, 3, 4, 8, 9, 10, 11], "epsilon": [1, 8, 11], "tradition": 1, "2t": [1, 8], "past": 1, "either": [1, 3, 8, 11], "environ": [1, 3, 4, 5, 8, 11], "variabl": [1, 3, 5, 8, 10], "onc": [1, 3, 5, 8, 10], "ommit": 1, "thereaft": 1, "recommend": [1, 5, 11], "np": [1, 3, 4, 5, 8, 10, 11], "asarrai": 1, "let": [1, 3, 11], "estim_prev": [1, 5, 8], "ae_": 1, "3f": [1, 6], "200": [1, 9], "600": 1, "914": 1, "final": [1, 3, 5, 11], "possibl": [1, 3, 8, 11], "string": [1, 8, 10, 11], "error_funct": 1, "from_nam": [1, 8], "accord": [1, 3, 4, 8, 9, 10, 11], "fix": [1, 4], "cover": [1, 4, 8, 9], "full": [1, 8], "contrast": 1, "natur": [1, 8], "despit": 1, "introduc": 1, "approxim": [1, 5, 8, 9], "preserv": [1, 5, 8], "procol": 1, "equal": [1, 8, 11], "distant": [1, 8], "interv": [1, 5, 8], "n_prevpoint": [1, 4, 5, 8], "determin": [1, 4, 5, 8], "constrain": [1, 5, 8, 10], "obtain": [1, 4, 8, 9, 11], "66": [1, 11], "given": [1, 3, 4, 8, 9, 10, 11], "num_prevalence_combin": [1, 8], "21": [1, 3, 5, 8], "n_class": [1, 3, 8, 9, 10, 11], "n_repeat": [1, 8], "1771": 1, "note": [1, 3, 4, 5, 8, 10], "last": [1, 3, 5, 8, 9, 10], "typic": [1, 4, 5, 8, 9, 10, 11], "singl": [1, 3, 6, 11], "higher": [1, 5], "comput": [1, 3, 5, 8, 11], "perform": [1, 3, 4, 5, 6, 8, 9, 11], "signific": 1, "instead": [1, 3, 4, 8, 10, 11], "work": [1, 3, 4, 5, 8, 10, 11], "wai": [1, 11], "around": [1, 10, 11], "maximum": [1, 8, 9, 11], "budg": 1, "close": [1, 10], "than": [1, 4, 5, 8, 9, 10], "budget": [1, 4], "achiev": [1, 3, 4, 5], "get_nprevpoints_approxim": [1, 8], "5000": [1, 5], "4960": 1, "cost": 1, "sometim": 1, "cumbersom": 1, "control": [1, 4, 8], "overal": 1, "experi": [1, 2, 3, 4, 5, 8], "rather": [1, 4], "By": [1, 3, 8], "avoid": [1, 8], "lead": [1, 10], "closer": 1, "surpass": 1, "script": [1, 2, 3, 6, 11], "pacc": [1, 3, 5, 8, 11], "reli": [1, 3, 8, 11], "logist": [1, 3, 9, 11], "regressor": [1, 3], "classifi": [1, 4, 5, 6, 8, 9, 11], "variou": [1, 5], "metric": [1, 3, 4, 6, 8, 11], "sklearn": [1, 3, 4, 5, 6, 9, 10, 11], "linear_model": [1, 3, 4, 6, 9], "logisticregress": [1, 3, 4, 6, 9, 11], "data": [1, 3, 4, 5, 6, 8, 9, 11], "min_df": [1, 3, 4, 5, 10, 11], "inplac": [1, 3, 10, 11], "lr": [1, 3, 9, 11], "aggreg": [1, 4, 5, 6, 8], "fit": [1, 3, 4, 5, 6, 8, 9, 10, 11], "df": 1, "artificial_sampling_report": 1, "mani": [1, 3, 4, 5, 6, 8, 11], "extract": [1, 8, 10], "categori": [1, 8], "n_repetit": [1, 4, 5], "n_job": [1, 3, 4, 8, 9, 10, 11], "parallel": [1, 3, 8, 9, 10, 11], "worker": [1, 8, 9, 10, 11], "cpu": [1, 9, 11], "random_se": [1, 8], "42": 1, "random": [1, 3, 4, 5, 8, 10], "seed": [1, 4, 8, 10], "replic": [1, 4, 8], "error_metr": [1, 4, 8], "line": [1, 3, 8], "result": [1, 2, 3, 4, 5, 6, 11], "report": 1, "panda": [1, 2], "datafram": 1, "displai": [1, 5, 8, 9], "just": [1, 3], "clearer": 1, "shown": [1, 5, 8], "convert": [1, 3, 8, 9, 10, 11], "repres": [1, 3, 5, 8, 10, 11], "decim": 1, "default": [1, 3, 8, 9, 10, 11], "pd": 1, "set_opt": 1, "expand_frame_repr": 1, "fals": [1, 3, 5, 8, 9, 10, 11], "map": [1, 9, 11], "000": 1, "000e": 1, "091": 1, "909": 1, "009": 1, "048": 1, "426e": 1, "04": 1, "837": 1, "037": 1, "114": 1, "633e": 1, "03": 1, "7": [1, 5, 8, 9, 11], "717": 1, "017": 1, "041": 1, "383e": 1, "366": 1, "634": 1, "034": 1, "070": 1, "412e": 1, "459": 1, "541": 1, "387e": 1, "565": 1, "435": 1, "035": 1, "073": 1, "535e": 1, "654": 1, "346": 1, "046": 1, "108": 1, "701e": 1, "725": 1, "275": 1, "075": 1, "235": 1, "515e": 1, "02": 1, "858": 1, "142": 1, "042": 1, "229": 1, "740e": 1, "945": 1, "055": 1, "27": [1, 3, 9], "357": 1, "219e": 1, "578": 1, "dtype": [1, 10], "float64": 1, "artificial_sampling_ev": [1, 4], "artificial_sampling_predict": [1, 5], "arrai": [1, 3, 5, 8, 9, 10, 11], "pip": 2, "older": 2, "version": [2, 8, 9], "scikit": [2, 3, 4, 8, 9, 10, 11], "numpi": [2, 4, 8, 9], "scipi": [2, 10], "pytorch": [2, 11], "quanet": [2, 6, 9, 11], "svmperf": [2, 3, 8, 11], "patch": [2, 3, 9, 11], "joblib": [2, 11], "tqdm": 2, "matplotlib": [2, 8], "involv": [2, 5, 8], "you": [2, 3], "appli": [2, 3, 4, 5, 8, 9, 10, 11], "ext": 2, "compil": [2, 3], "sourc": [2, 3, 6, 9], "prepare_svmperf": [2, 3], "sh": [2, 3], "job": 2, "directori": [2, 8, 9, 10, 11], "svm_perf_quantif": [2, 3], "optim": [2, 3, 4, 8, 9, 11], "measur": [2, 3, 4, 5, 6, 8, 11], "propos": [2, 3, 11], "barranquero": [2, 3, 9, 11], "extend": [2, 3, 8, 11], "former": [2, 11], "categor": [3, 10], "belong": [3, 11], "non": [3, 11], "group": 3, "though": [3, 8], "plan": 3, "add": [3, 4, 8, 10], "more": [3, 5, 11], "futur": 3, "character": [3, 6], "fact": [3, 5], "product": [3, 10], "quantifi": [3, 4, 5, 6, 8, 10, 11], "shoud": 3, "basequantifi": [3, 8, 11], "abstract": [3, 8, 9, 10, 11], "abstractmethod": 3, "self": [3, 8, 9, 10, 11], "set_param": [3, 8, 9, 11], "get_param": [3, 8, 9, 11], "deep": [3, 8, 11], "familiar": 3, "structur": [3, 11], "inspir": 3, "reason": [3, 5, 6], "why": 3, "ha": [3, 4, 5, 8, 9, 10, 11], "adopt": [3, 4, 10], "respond": 3, "predict": [3, 4, 5, 8, 9, 11], "input": [3, 5, 8, 9, 11], "element": [3, 10, 11], "while": [3, 5, 9, 10, 11], "selector": 3, "process": [3, 4, 8], "hyperparamet": [3, 8, 11], "search": [3, 4, 6, 8, 11], "part": [3, 10], "aggregativequantifi": [3, 11], "must": [3, 10, 11], "fit_learn": 3, "classif_predict": [3, 11], "mention": 3, "befor": [3, 8, 9, 10, 11], "inde": [3, 4], "alreadi": [3, 8, 11], "preclassifi": 3, "maintain": [3, 11], "through": [3, 8], "properti": [3, 8, 9, 10, 11], "learner": [3, 4, 9, 11], "extern": 3, "probabilist": [3, 8, 9, 11], "inherit": 3, "aggregativeprobabilisticquantifi": [3, 11], "posterior": [3, 8, 9, 11], "crisp": [3, 8, 11], "decis": [3, 8, 9, 11], "hard": [3, 8, 9], "classif_posterior": [3, 11], "posterior_prob": [3, 11], "advantag": [3, 11], "procedur": [3, 6, 8], "veri": [3, 5], "effici": 3, "everi": [3, 8, 11], "leverag": 3, "speed": [3, 8, 11], "up": [3, 4, 8, 9, 11], "over": [3, 4, 8], "customarili": [3, 4], "done": 3, "four": 3, "cc": [3, 5, 11], "simplest": 3, "deliv": [3, 11], "adjust": [3, 6, 8, 11], "pcc": [3, 4, 5, 11], "soft": 3, "serv": [3, 8, 10], "complet": [3, 5, 11], "equip": [3, 5], "svm": [3, 5, 6, 9, 10, 11], "linearsvc": [3, 5, 10], "pickl": [3, 8, 10, 11], "alia": [3, 8, 10, 11], "classifyandcount": [3, 11], "estim_preval": [3, 6, 11], "rate": [3, 8, 9, 11], "binari": [3, 5, 6, 8, 9, 10, 11], "init": 3, "addit": 3, "val_split": [3, 4, 9, 11], "integ": [3, 8, 9, 10, 11], "k": [3, 6, 8, 9, 10, 11], "fold": [3, 8, 10, 11], "cross": [3, 8, 9, 10, 11], "specif": [3, 4, 8], "held": [3, 4, 8, 9, 11], "out": [3, 4, 5, 8, 9, 10, 11], "postpon": 3, "constructor": 3, "prevail": 3, "overrid": 3, "illustr": [3, 4, 5], "seem": 3, "calibr": [3, 8], "calibratedclassifiercv": 3, "base_estim": 3, "cv": [3, 4], "predict_proba": [3, 9, 11], "As": [3, 4], "calibratedclassifi": 3, "except": [3, 8, 11], "rais": [3, 8, 11], "lastli": 3, "everyth": 3, "said": 3, "aboud": 3, "sld": [3, 11], "expectationmaximizationquantifi": [3, 11], "describ": [3, 8, 11], "saeren": [3, 11], "m": [3, 8, 11], "latinn": [3, 11], "decaesteck": [3, 11], "c": [3, 4, 8, 9, 10, 11], "2002": 3, "priori": 3, "14": 3, "41": 3, "attempt": 3, "although": [3, 4, 5, 11], "improv": [3, 8, 9, 11], "rank": [3, 9], "almost": 3, "alwai": [3, 4, 5, 11], "among": 3, "effect": 3, "carri": [3, 10, 11], "gonz\u00e1lez": 3, "castro": 3, "v": [3, 8, 9, 11], "alaiz": 3, "rodr\u0131": 3, "guez": 3, "alegr": 3, "2013": 3, "scienc": 3, "218": 3, "146": 3, "It": [3, 4, 5, 8], "allia": 3, "hellingerdistancei": [3, 11], "mixtur": [3, 8, 11], "previou": 3, "overridden": [3, 11], "proport": [3, 4, 9, 10, 11], "taken": [3, 8, 9, 10], "itself": [3, 8, 11], "accept": 3, "elm": [3, 11], "famili": [3, 11], "target": [3, 5, 6, 8, 9, 11], "orient": [3, 6, 8, 11], "joachim": [3, 9, 11], "svmq": 3, "d\u00edez": 3, "reliabl": 3, "pattern": 3, "recognit": 3, "48": 3, "591": 3, "604": 3, "svmkld": 3, "multivari": [3, 9], "transact": 3, "discoveri": 3, "articl": [3, 4], "svmnkld": 3, "svmae": 3, "error": [3, 4, 6, 7, 9, 11], "svmrae": 3, "what": 3, "nowadai": 3, "consid": [3, 5, 8, 9, 10, 11], "behav": [3, 5], "If": [3, 5, 8, 10, 11], "want": [3, 4], "custom": [3, 6, 10], "modifi": [3, 8], "assign": [3, 10], "Then": 3, "re": [3, 4, 9, 10], "thing": 3, "your": 3, "svmperf_hom": [3, 11], "valid_loss": [3, 9, 11], "mycustomloss": 3, "28": [3, 10], "current": [3, 8, 9, 10, 11], "support": [3, 6, 10, 11], "oper": 3, "trivial": 3, "strategi": [3, 4], "2016": [3, 10, 11], "sentiment": [3, 6, 10], "19": [3, 10], "onevsal": [3, 11], "know": 3, "where": [3, 5, 8, 9, 10, 11], "top": [3, 8, 11], "thu": [3, 4, 5, 8, 9, 11], "nor": 3, "castano": [3, 10], "2019": [3, 10, 11], "dynam": [3, 9, 10, 11], "task": [3, 4, 10], "45": [3, 5, 10], "15": [3, 8, 10], "polici": [3, 11], "processor": 3, "av": [3, 11], "ptr": [3, 11], "member": [3, 11], "d": [3, 11], "static": [3, 11], "red_siz": [3, 11], "pleas": 3, "check": [3, 4, 8], "offer": [3, 6], "torch": [3, 9, 11], "embed": [3, 9, 11], "lstm": [3, 9, 11], "cnn": [3, 11], "its": [3, 4, 8, 9, 11], "layer": [3, 9, 11], "neuralclassifiertrain": [3, 9, 11], "cnnnet": [3, 9, 11], "vocabulary_s": [3, 9, 10, 11], "cuda": [3, 9, 11], "supervis": [4, 6], "strongli": [4, 5], "good": [4, 5], "choic": [4, 11], "hyper": [4, 8, 9], "wherebi": 4, "chosen": [4, 8], "pick": 4, "best": [4, 8, 9, 11], "being": [4, 8, 11], "criteria": 4, "solv": [4, 11], "assess": 4, "own": 4, "right": [4, 8, 10], "impos": [4, 8], "aim": [4, 5], "appropri": 4, "configur": [4, 8], "design": 4, "long": [4, 9], "regard": 4, "next": [4, 8, 9, 10], "section": 4, "argu": 4, "alejandro": 4, "fabrizio": 4, "count": [4, 5, 6, 8, 10, 11], "arxiv": 4, "preprint": 4, "2011": 4, "02552": 4, "2020": [4, 9], "varieti": 4, "exhibit": [4, 5], "degre": 4, "model_select": [4, 7, 11], "gridsearchq": [4, 8, 11], "grid": [4, 8, 11], "explor": [4, 8], "portion": 4, "param_grid": [4, 8, 11], "logspac": [4, 11], "class_weight": [4, 11], "eval_budget": 4, "refit": [4, 8], "retrain": [4, 9], "goe": 4, "end": [4, 8, 11], "best_params_": 4, "best_model_": 4, "101": 4, "5f": 4, "system": [4, 11], "start": 4, "hyperparam": 4, "0001": [4, 11], "got": [4, 11], "24987": 4, "48135": 4, "001": [4, 9, 11], "24866": 4, "100000": 4, "43676": 4, "finish": 4, "param": [4, 8, 9, 11], "19982": 4, "develop": [4, 6], "1010": 4, "5005": 4, "54it": 4, "20342": 4, "altern": 4, "computation": 4, "costli": 4, "try": 4, "theoret": 4, "suboptim": 4, "opt": 4, "gridsearchcv": [4, 11], "10000": 4, "5379": 4, "55it": 4, "41734": 4, "wors": [4, 5, 8], "larg": 4, "between": [4, 5, 6, 8, 9, 11], "modal": 4, "turn": 4, "better": 4, "nonetheless": 4, "happen": [4, 5], "basic": [5, 11], "help": [5, 11], "analys": [5, 6], "outcom": 5, "main": 5, "method_nam": [5, 8, 11], "name": [5, 8, 9, 10, 11], "shape": [5, 8, 9, 10, 11], "correspond": [5, 10], "matrix": [5, 8, 11], "appear": 5, "occur": [5, 10], "merg": 5, "emq": [5, 11], "55": 5, "showcas": 5, "wide": 5, "variant": [5, 6, 8, 11], "linear": [5, 8, 11], "review": [5, 6, 10], "step": [5, 8], "05": [5, 8, 11], "gen_data": 5, "base_classifi": 5, "yield": [5, 8, 10, 11], "tr_prev": [5, 8, 11], "append": 5, "__class__": 5, "__name__": 5, "insight": 5, "view": 5, "y": [5, 8, 9, 10, 11], "axi": [5, 8], "against": 5, "x": [5, 8, 9, 10, 11], "unfortun": 5, "limit": [5, 8, 11], "binary_diagon": [5, 8], "train_prev": [5, 8], "savepath": [5, 8], "bin_diag": 5, "png": 5, "save": [5, 8], "pdf": [5, 11], "cyan": 5, "dot": [5, 8], "color": [5, 8], "band": [5, 8], "hidden": [5, 9, 11], "show_std": [5, 8], "unadjust": 5, "bias": 5, "toward": [5, 10], "seen": [5, 8, 11], "evinc": 5, "box": [5, 8], "binary_bias_glob": [5, 8], "bin_bia": 5, "unbias": 5, "center": 5, "tend": 5, "overestim": 5, "high": [5, 8], "lower": [5, 11], "again": 5, "accordingli": 5, "20": [5, 8, 11], "90": [5, 8], "rewrit": 5, "method_data": 5, "training_preval": 5, "linspac": 5, "training_s": 5, "suffic": 5, "latex": 5, "syntax": 5, "_": [5, 8, 10], "now": 5, "clearli": 5, "binary_bias_bin": [5, 8], "broken": [5, 8], "down": [5, 8, 10], "bin": [5, 8, 11], "To": [5, 10], "nbin": [5, 8, 11], "isometr": [5, 8], "subinterv": 5, "interestingli": 5, "enough": 5, "seemingli": 5, "tendenc": 5, "low": [5, 8, 9], "underestim": 5, "beyond": 5, "67": [5, 8], "curios": 5, "pretti": 5, "discuss": 5, "analyz": 5, "compar": [5, 8], "both": [5, 10], "irrespect": [5, 11], "harder": 5, "interpret": [5, 6, 11], "error_by_drift": [5, 8], "error_nam": [5, 8], "n_bin": [5, 8, 11], "err_drift": 5, "whenev": [5, 8], "clear": 5, "lowest": 5, "difficult": 5, "rememb": 5, "solid": 5, "comparison": 5, "detriment": 5, "visual": [5, 6], "hide": 5, "framework": [6, 11], "written": 6, "root": 6, "concept": 6, "baselin": 6, "integr": 6, "commonli": 6, "facilit": 6, "twitter": [6, 10], "true_preval": 6, "hold": [6, 8, 11], "endeavour": [6, 8], "popular": 6, "expect": [6, 11], "maxim": [6, 11], "hdy": [6, 11], "versatil": 6, "etc": 6, "uci": [6, 10], "nativ": 6, "loss": [6, 9, 11], "perf": [6, 9, 11], "ad": 6, "meta": [6, 8], "plot": [6, 7], "diagon": [6, 8], "bia": [6, 8, 9, 11], "drift": 6, "api": 6, "subpackag": 7, "submodul": 7, "util": [7, 9, 10], "content": 7, "bctscalibr": 9, "nbvscalibr": 9, "recalibratedprobabilisticclassifi": 9, "recalibratedprobabilisticclassifierbas": 9, "classes_": [9, 10, 11], "fit_cv": 9, "fit_tr_val": 9, "tscalibr": 9, "vscalibr": 9, "lowranklogisticregress": 9, "document_embed": 9, "lstmnet": 9, "reset_net_param": 9, "textclassifiernet": 9, "dimens": [8, 9, 10, 11], "forward": [9, 11], "xavier_uniform": 9, "torchdataset": 9, "asdataload": 9, "decision_funct": 9, "splitstratifi": 10, "stat": 10, "train_test": 10, "xp": 10, "xy": 10, "split_random": 10, "split_stratifi": 10, "uniform_sampl": 10, "uniform_sampling_index": 10, "fetch_lequa2022": 10, "warn": 10, "indextransform": 10, "add_word": 10, "fit_transform": 10, "reader": 8, "binar": [8, 10], "from_csv": 10, "from_spars": 10, "from_text": 10, "reindex_label": 10, "getptecondestim": 11, "solve_adjust": 11, "adjustedclassifyandcount": 11, "distributionmatch": 11, "dy": 11, "em": 11, "max_it": 11, "explicitlossminimis": [], "max": 11, "ms2": 11, "mediansweep": 11, "mediansweep2": 11, "probabilisticadjustedclassifyandcount": 11, "probabilisticclassifyandcount": 11, "smm": 11, "t50": 11, "thresholdoptim": 11, "cross_generate_predict": 11, "cross_generate_predictions_depr": 11, "binaryquantifi": 11, "onevsallgener": 11, "eacc": 11, "ecc": 11, "eemq": 11, "ehdi": 11, "epacc": 11, "valid_polici": 11, "ensemblefactori": 11, "get_probability_distribut": 11, "quanetmodul": 11, "quanettrain": 11, "clean_checkpoint": 11, "clean_checkpoint_dir": 11, "mae_loss": 11, "non_aggreg": 8, "maximumlikelihoodprevalenceestim": 11, "absolute_error": 8, "hat": 8, "frac": 8, "mathcal": 8, "sum_": 8, "acc_error": 8, "y_true": 8, "y_pred": 8, "tp": 8, "tn": 8, "fp": 8, "fn": 8, "stand": [8, 11], "f1_error": 8, "macro": 8, "f_1": 8, "harmon": 8, "recal": 8, "2tp": 8, "independ": [8, 11], "err_nam": 8, "p_hat": 8, "d_": 8, "kl": 8, "log": [8, 10], "factor": 8, "beforehand": 8, "n_sampl": [8, 9], "mean_absolute_error": 8, "mean_relative_absolute_error": 8, "relative_absolute_error": 8, "underlin": 8, "displaystyl": 8, "abstractprotocol": 8, "union": [8, 10, 11], "aggr_speedup": 8, "auto": 8, "evaluation_report": 8, "app": [8, 11], "repeat": 8, "smooth_limits_epsilon": 8, "random_st": [8, 10], "return_typ": 8, "sample_prev": 8, "abstractstochasticseededprotocol": 8, "onlabelledcollectionprotocol": 8, "95": 8, "copi": [8, 10], "quantiti": 8, "labelled_collect": 8, "prevalence_grid": 8, "exhaust": 8, "sum": [8, 11], "implicit": 8, "return_constrained_dim": 8, "rest": [8, 9, 10], "quit": 8, "obvious": 8, "determinist": 8, "anywher": 8, "multipli": 8, "necessari": 8, "samples_paramet": 8, "total": 8, "parent": 8, "sequenc": 8, "enforc": 8, "collat": 8, "arg": [8, 10], "domainmix": 8, "domaina": 8, "domainb": 8, "mixture_point": 8, "domain": 8, "scale": [8, 9, 11], "npp": 8, "draw": 8, "uniformli": 8, "therefor": 8, "get_col": 8, "get_labelled_collect": 8, "on_preclassified_inst": 8, "pre_classif": 8, "in_plac": 8, "usimplexpp": 8, "kraemer": 8, "algorithm": [8, 11], "sens": 8, "guarante": [8, 10], "prefer": 8, "intract": 8, "hellingerdist": 8, "hellingh": 8, "distanc": [8, 11], "hd": [8, 11], "discret": [8, 11], "sqrt": 8, "p_i": 8, "q_i": 8, "real": [8, 9, 10, 11], "topsoedist": 8, "1e": [8, 9, 11], "topso": [8, 11], "adjusted_quantif": 8, "prevalence_estim": 8, "tpr": [8, 11], "fpr": [8, 11], "clip": 8, "exce": 8, "check_prevalence_vector": 8, "raise_except": 8, "toleranz": 8, "08": 8, "combinations_budget": 8, "largest": 8, "dimension": [8, 9, 10, 11], "repetit": 8, "less": [8, 10], "normalize_preval": 8, "l1": [8, 11], "calcul": 8, "binom": 8, "mass": 8, "alloc": [8, 9], "solut": 8, "star": 8, "bar": 8, "prevalence_from_label": 8, "n_instanc": [8, 9, 11], "correctli": 8, "even": 8, "len": 8, "prevalence_from_prob": 8, "bool": [8, 9, 11], "argmax": 8, "prevalence_linspac": 8, "01": [8, 9, 11], "separ": [8, 10], "99": 8, "uniform_prevalence_sampl": 8, "adapt": [8, 9], "post": 8, "http": [8, 10, 11], "stackexchang": 8, "com": 8, "question": 8, "3227": 8, "uniform": [8, 10], "uniform_simplex_sampl": 8, "dict": [8, 10, 11], "timeout": 8, "dictionari": [8, 9, 10, 11], "kei": [8, 10], "quantification_error": 8, "whether": [8, 9, 10, 11], "ignor": [8, 10, 11], "gen": 8, "establish": 8, "timer": 8, "longer": [8, 11], "timeouterror": 8, "bound": [8, 11], "stdout": 8, "best_model": 8, "after": [8, 11], "minim": [8, 11], "routin": [8, 10, 11], "unus": [8, 9], "contanin": 8, "cross_val_predict": 8, "akin": [8, 11], "issu": 8, "reproduc": [8, 10], "pos_class": [8, 10], "titl": 8, "colormap": 8, "listedcolormap": 8, "vertical_xtick": 8, "legend": 8, "local": 8, "sign": 8, "minu": 8, "classs": 8, "compon": [8, 9, 11], "cm": 8, "tab10": 8, "secondari": 8, "global": 8, "method_ord": 8, "henc": [8, 10], "conveni": 8, "multiclass": [8, 10, 11], "inconveni": 8, "leyend": 8, "hightlight": 8, "associ": 8, "brokenbar_supremacy_by_drift": 8, "isomer": 8, "x_error": 8, "y_error": 8, "ttest_alpha": 8, "005": 8, "tail_density_threshold": 8, "region": 8, "chart": 8, "condit": [8, 11], "ii": 8, "significantli": 8, "side": 8, "confid": 8, "percentil": 8, "divid": 8, "amount": 8, "similar": [8, 11], "threshold": [8, 11], "densiti": 8, "tail": 8, "discard": 8, "outlier": 8, "show_dens": 8, "show_legend": 8, "logscal": 8, "vline": 8, "especi": 8, "mai": 8, "cumberson": 8, "gain": 8, "understand": 8, "fare": 8, "regim": 8, "highlight": 8, "vertic": 8, "earlystop": 8, "patienc": [8, 9, 11], "lower_is_bett": 8, "earli": [8, 9, 11], "stop": [8, 9, 11], "epoch": [8, 9, 11], "best_epoch": 8, "best_scor": 8, "consecut": [8, 9, 11], "monitor": 8, "obtaind": 8, "far": [8, 9, 10], "flag": 8, "keep": 8, "track": 8, "boolean": [8, 10, 11], "create_if_not_exist": 8, "makedir": 8, "exist_ok": 8, "join": 8, "dir": [8, 11], "subdir": 8, "anotherdir": 8, "create_parent_dir": 8, "exist": 8, "txt": 8, "download_fil": 8, "url": 8, "archive_filenam": 8, "destin": 8, "filenam": 8, "download_file_if_not_exist": 8, "dowload": 8, "get_quapy_hom": 8, "home": [8, 10], "perman": 8, "map_parallel": 8, "func": 8, "slice": 8, "item": 8, "wrapper": [8, 9, 10, 11], "multiprocess": [8, 11], "delai": 8, "args_i": 8, "silent": [8, 11], "child": 8, "ensur": 8, "pickled_resourc": 8, "pickle_path": 8, "generation_func": 8, "fast": [8, 10], "resourc": 8, "some_arrai": 8, "mock": [8, 9], "rand": 8, "my_arrai": 8, "pkl": 8, "save_text_fil": 8, "disk": 8, "miss": 8, "temp_se": 8, "context": 8, "tempor": [8, 9], "outer": 8, "state": 8, "within": [8, 11], "get_njob": [], "correct": [9, 11], "temperatur": [9, 11], "bct": [9, 11], "abstent": 9, "alexandari": [9, 11], "afterward": [9, 11], "No": [9, 11], "nbv": [9, 11], "baseestim": [9, 11], "calibratorfactori": 9, "n_compon": 9, "kwarg": [9, 10, 11], "decomposit": 9, "truncatedsvd": 9, "princip": 9, "regress": 9, "n_featur": 9, "length": [9, 10], "eventu": [9, 10], "unalt": 9, "emb": 9, "embedding_s": 9, "hidden_s": 9, "repr_siz": 9, "kernel_height": 9, "stride": 9, "pad": [9, 10], "drop_p": 9, "convolut": 9, "vocabulari": [9, 10], "kernel": 9, "drop": 9, "dropout": [9, 11], "batch": 9, "dataload": 9, "tensor": 9, "n_dimens": 9, "lstm_class_nlay": 9, "short": 9, "memori": 9, "net": 9, "weight_decai": 9, "batch_siz": 9, "64": [9, 11], "batch_size_test": 9, "512": [9, 11], "padding_length": 9, "checkpointpath": 9, "checkpoint": [9, 11], "classifier_net": 9, "weight": [9, 10], "decai": 9, "wait": 9, "enabl": 9, "gpu": [9, 11], "vocab_s": 9, "reiniti": 9, "trainer": 9, "disjoint": 9, "embed_s": 9, "nn": 9, "pad_length": 9, "xavier": 9, "shuffl": [9, 10], "longest": 9, "shorter": 9, "svmperf_bas": [9, 11], "classifiermixin": 9, "thorsten": 9, "refer": [9, 10], "svm_perf_learn": 9, "svm_perf_classifi": 9, "trade": [9, 11], "off": [9, 11], "margin": [9, 11], "std": 9, "qacc": 9, "qf1": 9, "qgm": 9, "12": 9, "26": 9, "23": 9, "train_siz": 10, "conform": 10, "round": 10, "loader_kwarg": 10, "read": 10, "tupl": [8, 10, 11], "tr": 10, "te": 10, "csr": 10, "csr_matrix": 10, "4403": 10, "my_collect": 10, "codefram": 10, "larger": [10, 11], "actual": [10, 11], "empti": 10, "met": 10, "whose": [10, 11], "train_prop": 10, "left": [8, 10], "stratif": 10, "greater": 10, "dataset_nam": 10, "data_hom": 10, "test_split": 10, "predefin": 10, "uci_dataset": 10, "dump": 10, "leav": 10, "quay_data": 10, "ml": 10, "5fcvx2": 10, "x2": 10, "offici": 10, "lequa": 10, "competit": 10, "t1a": 10, "t1b": 10, "t2a": 10, "t2b": 10, "raw": 10, "merchandis": 10, "sperduti": 10, "2022": 10, "overview": 10, "clef": 10, "lequa2022_experi": 10, "py": 10, "guid": 10, "val_gen": 10, "test_gen": 10, "samplesfromdir": 10, "minimun": 10, "kept": 10, "subsequ": 10, "mining6": 10, "devel": 10, "style": 10, "countvector": 10, "keyword": [10, 11], "nogap": 10, "regardless": 10, "codifi": 10, "unknown": 10, "surfac": 10, "assert": 10, "gap": 10, "preced": 10, "decid": 10, "uniqu": 10, "rare": 10, "unk": 10, "minimum": [10, 11], "occurr": 10, "org": [10, 11], "stabl": 10, "feature_extract": 10, "html": 10, "subtyp": 10, "spmatrix": 10, "remov": [10, 11], "infrequ": 10, "aka": [10, 11], "sublinear_tf": 10, "scall": 10, "counter": 10, "tfidfvector": 10, "whcih": 10, "had": 10, "encod": 10, "utf": 10, "csv": 10, "feat1": 10, "feat2": 10, "featn": 10, "covari": 10, "express": 10, "row": 10, "class2int": 10, "collet": 10, "fomart": 10, "progress": 10, "sentenc": 10, "classnam": 10, "u1": 10, "misclassif": 11, "n_classes_": [], "fit_classifi": 11, "bypass": 11, "y_": 11, "ptecondestim": 11, "prevs_estim": 11, "ax": 11, "entri": 11, "y_i": 11, "y_j": 11, "_posterior_probabilities_": 11, "attribut": 11, "subclass": 11, "give": 11, "outsid": 11, "unless": 11, "noth": 11, "els": 11, "cdf": 11, "match": 11, "helling": 11, "sought": 11, "channel": 11, "proper": 11, "ch": 11, "di": 11, "dij": 11, "fraction": 11, "th": 11, "tol": 11, "ternari": 11, "dl": 11, "doi": 11, "1145": 11, "3219819": 11, "3220059": 11, "histogram": 11, "toler": 11, "explicit": 11, "exact_train_prev": 11, "recalib": 11, "updat": 11, "likelihood": [9, 11], "mutual": 11, "recurs": 11, "until": 11, "converg": 11, "suggest": 11, "recalibr": 11, "reach": 11, "loop": 11, "cumul": 11, "unlabel": 11, "latter": 11, "forman": 11, "2006": 11, "2008": 11, "goal": 11, "bring": 11, "denomin": 11, "median": 11, "sweep": 11, "binary_quantifi": 11, "prevel": 11, "emploi": 11, "resp": 11, "subobject": 11, "nest": 11, "pipelin": 11, "__": 11, "simplif": 11, "2021": 11, "equival": 11, "cosest": 11, "heurist": 11, "choos": 11, "ground": 11, "complement": 11, "param_mod_sel": 11, "param_model_sel": 11, "min_po": 11, "max_sample_s": 11, "closest": 11, "preliminari": 11, "recomput": 11, "compat": 11, "l": 11, "base_quantifier_class": 11, "factori": 11, "common": 11, "doc_embedding_s": 11, "stats_siz": 11, "lstm_hidden_s": 11, "lstm_nlayer": 11, "ff_layer": 11, "1024": 11, "bidirect": 11, "qdrop_p": 11, "order_bi": 11, "cell": 11, "connect": 11, "ff": 11, "sort": 11, "doc_embed": 11, "doc_posterior": 11, "recip": 11, "care": 11, "regist": 11, "hook": 11, "n_epoch": 11, "tr_iter_per_poch": 11, "va_iter_per_poch": 11, "checkpointdir": 11, "checkpointnam": 11, "phase": 11, "anyth": 11, "truth": 11, "mlpe": 11, "lazi": 11, "put": 11, "assumpion": 11, "beat": [9, 11], "estimant": 11, "kundaj": 9, "shrikumar": 9, "novemb": 9, "232": 9, "pmlr": 9, "outpu": [], "partit": 9, "ight": [], "valueerror": 8, "attach": 10, "mix": 10, "onevsallaggreg": 11, "parallel_backend": 11, "loki": 11, "backend": 11, "cannot": 11, "temp": 11, "getonevsal": [], "realiz": 8, "prepar": 8, "act": 8, "modif": 8, "place": 8, "host_fold": 9, "tmp": 9, "delet": 9, "newelm": 11, "underli": 11, "newsvma": 11, "newsvmkld": 11, "newsvmq": 11, "newsvmra": 11, "newonevsal": 11}, "objects": {"": [[8, 0, 0, "-", "quapy"]], "quapy": [[9, 0, 0, "-", "classification"], [10, 0, 0, "-", "data"], [8, 0, 0, "-", "error"], [8, 0, 0, "-", "evaluation"], [8, 0, 0, "-", "functional"], [11, 0, 0, "-", "method"], [8, 0, 0, "-", "model_selection"], [8, 0, 0, "-", "plot"], [8, 0, 0, "-", "protocol"], [8, 0, 0, "-", "util"]], "quapy.classification": [[9, 0, 0, "-", "calibration"], [9, 0, 0, "-", "methods"], [9, 0, 0, "-", "neural"], [9, 0, 0, "-", "svmperf"]], "quapy.classification.calibration": [[9, 1, 1, "", "BCTSCalibration"], [9, 1, 1, "", "NBVSCalibration"], [9, 1, 1, "", "RecalibratedProbabilisticClassifier"], [9, 1, 1, "", "RecalibratedProbabilisticClassifierBase"], [9, 1, 1, "", "TSCalibration"], [9, 1, 1, "", "VSCalibration"]], "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase": [[9, 2, 1, "", "classes_"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "fit_cv"], [9, 3, 1, "", "fit_tr_val"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"]], "quapy.classification.methods": [[9, 1, 1, "", "LowRankLogisticRegression"]], "quapy.classification.methods.LowRankLogisticRegression": [[9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural": [[9, 1, 1, "", "CNNnet"], [9, 1, 1, "", "LSTMnet"], [9, 1, 1, "", "NeuralClassifierTrainer"], [9, 1, 1, "", "TextClassifierNet"], [9, 1, 1, "", "TorchDataset"]], "quapy.classification.neural.CNNnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.LSTMnet": [[9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "get_params"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"]], "quapy.classification.neural.NeuralClassifierTrainer": [[9, 2, 1, "", "device"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict"], [9, 3, 1, "", "predict_proba"], [9, 3, 1, "", "reset_net_params"], [9, 3, 1, "", "set_params"], [9, 3, 1, "", "transform"]], "quapy.classification.neural.TextClassifierNet": [[9, 3, 1, "", "dimensions"], [9, 3, 1, "", "document_embedding"], [9, 3, 1, "", "forward"], [9, 3, 1, "", "get_params"], [9, 3, 1, "", "predict_proba"], [9, 4, 1, "", "training"], [9, 2, 1, "", "vocabulary_size"], [9, 3, 1, "", "xavier_uniform"]], "quapy.classification.neural.TorchDataset": [[9, 3, 1, "", "asDataloader"]], "quapy.classification.svmperf": [[9, 1, 1, "", "SVMperf"]], "quapy.classification.svmperf.SVMperf": [[9, 3, 1, "", "decision_function"], [9, 3, 1, "", "fit"], [9, 3, 1, "", "predict"], [9, 4, 1, "", "valid_losses"]], "quapy.data": [[10, 0, 0, "-", "base"], [10, 0, 0, "-", "datasets"], [10, 0, 0, "-", "preprocessing"], [10, 0, 0, "-", "reader"]], "quapy.data.base": [[10, 1, 1, "", "Dataset"], [10, 1, 1, "", "LabelledCollection"]], "quapy.data.base.Dataset": [[10, 3, 1, "", "SplitStratified"], [10, 2, 1, "", "binary"], [10, 2, 1, "", "classes_"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 2, 1, "", "n_classes"], [10, 3, 1, "", "stats"], [10, 2, 1, "", "train_test"], [10, 2, 1, "", "vocabulary_size"]], "quapy.data.base.LabelledCollection": [[10, 2, 1, "", "X"], [10, 2, 1, "", "Xp"], [10, 2, 1, "", "Xy"], [10, 2, 1, "", "binary"], [10, 3, 1, "", "counts"], [10, 3, 1, "", "kFCV"], [10, 3, 1, "", "load"], [10, 3, 1, "", "mix"], [10, 2, 1, "", "n_classes"], [10, 2, 1, "", "p"], [10, 3, 1, "", "prevalence"], [10, 3, 1, "", "sampling"], [10, 3, 1, "", "sampling_from_index"], [10, 3, 1, "", "sampling_index"], [10, 3, 1, "", "split_random"], [10, 3, 1, "", "split_stratified"], [10, 3, 1, "", "stats"], [10, 3, 1, "", "uniform_sampling"], [10, 3, 1, "", "uniform_sampling_index"], [10, 2, 1, "", "y"]], "quapy.data.datasets": [[10, 5, 1, "", "fetch_UCIDataset"], [10, 5, 1, "", "fetch_UCILabelledCollection"], [10, 5, 1, "", "fetch_lequa2022"], [10, 5, 1, "", "fetch_reviews"], [10, 5, 1, "", "fetch_twitter"], [10, 5, 1, "", "warn"]], "quapy.data.preprocessing": [[10, 1, 1, "", "IndexTransformer"], [10, 5, 1, "", "index"], [10, 5, 1, "", "reduce_columns"], [10, 5, 1, "", "standardize"], [10, 5, 1, "", "text2tfidf"]], "quapy.data.preprocessing.IndexTransformer": [[10, 3, 1, "", "add_word"], [10, 3, 1, "", "fit"], [10, 3, 1, "", "fit_transform"], [10, 3, 1, "", "transform"], [10, 3, 1, "", "vocabulary_size"]], "quapy.data.reader": [[10, 5, 1, "", "binarize"], [10, 5, 1, "", "from_csv"], [10, 5, 1, "", "from_sparse"], [10, 5, 1, "", "from_text"], [10, 5, 1, "", "reindex_labels"]], "quapy.error": [[8, 5, 1, "", "absolute_error"], [8, 5, 1, "", "acc_error"], [8, 5, 1, "", "acce"], [8, 5, 1, "", "ae"], [8, 5, 1, "", "f1_error"], [8, 5, 1, "", "f1e"], [8, 5, 1, "", "from_name"], [8, 5, 1, "", "kld"], [8, 5, 1, "", "mae"], [8, 5, 1, "", "mean_absolute_error"], [8, 5, 1, "", "mean_relative_absolute_error"], [8, 5, 1, "", "mkld"], [8, 5, 1, "", "mnkld"], [8, 5, 1, "", "mrae"], [8, 5, 1, "", "mse"], [8, 5, 1, "", "nkld"], [8, 5, 1, "", "rae"], [8, 5, 1, "", "relative_absolute_error"], [8, 5, 1, "", "se"], [8, 5, 1, "", "smooth"]], "quapy.evaluation": [[8, 5, 1, "", "evaluate"], [8, 5, 1, "", "evaluation_report"], [8, 5, 1, "", "prediction"]], "quapy.functional": [[8, 5, 1, "", "HellingerDistance"], [8, 5, 1, "", "TopsoeDistance"], [8, 5, 1, "", "adjusted_quantification"], [8, 5, 1, "", "check_prevalence_vector"], [8, 5, 1, "", "get_nprevpoints_approximation"], [8, 5, 1, "", "normalize_prevalence"], [8, 5, 1, "", "num_prevalence_combinations"], [8, 5, 1, "", "prevalence_from_labels"], [8, 5, 1, "", "prevalence_from_probabilities"], [8, 5, 1, "", "prevalence_linspace"], [8, 5, 1, "", "strprev"], [8, 5, 1, "", "uniform_prevalence_sampling"], [8, 5, 1, "", "uniform_simplex_sampling"]], "quapy.method": [[11, 0, 0, "-", "aggregative"], [11, 0, 0, "-", "base"], [11, 0, 0, "-", "meta"], [11, 0, 0, "-", "neural"], [11, 0, 0, "-", "non_aggregative"]], "quapy.method.aggregative": [[11, 1, 1, "", "ACC"], [11, 4, 1, "", "AdjustedClassifyAndCount"], [11, 1, 1, "", "AggregativeProbabilisticQuantifier"], [11, 1, 1, "", "AggregativeQuantifier"], [11, 1, 1, "", "CC"], [11, 4, 1, "", "ClassifyAndCount"], [11, 1, 1, "", "DistributionMatching"], [11, 1, 1, "", "DyS"], [11, 1, 1, "", "EMQ"], [11, 4, 1, "", "ExpectationMaximizationQuantifier"], [11, 1, 1, "", "HDy"], [11, 4, 1, "", "HellingerDistanceY"], [11, 1, 1, "", "MAX"], [11, 1, 1, "", "MS"], [11, 1, 1, "", "MS2"], [11, 4, 1, "", "MedianSweep"], [11, 4, 1, "", "MedianSweep2"], [11, 1, 1, "", "OneVsAllAggregative"], [11, 1, 1, "", "PACC"], [11, 1, 1, "", "PCC"], [11, 4, 1, "", "ProbabilisticAdjustedClassifyAndCount"], [11, 4, 1, "", "ProbabilisticClassifyAndCount"], [11, 4, 1, "", "SLD"], [11, 1, 1, "", "SMM"], [11, 1, 1, "", "T50"], [11, 1, 1, "", "ThresholdOptimization"], [11, 1, 1, "", "X"], [11, 5, 1, "", "cross_generate_predictions"], [11, 5, 1, "", "cross_generate_predictions_depr"], [11, 5, 1, "", "newELM"], [11, 5, 1, "", "newSVMAE"], [11, 5, 1, "", "newSVMKLD"], [11, 5, 1, "", "newSVMQ"], [11, 5, 1, "", "newSVMRAE"]], "quapy.method.aggregative.ACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"], [11, 3, 1, "", "solve_adjustment"]], "quapy.method.aggregative.AggregativeProbabilisticQuantifier": [[11, 3, 1, "", "classify"]], "quapy.method.aggregative.AggregativeQuantifier": [[11, 3, 1, "", "aggregate"], [11, 2, 1, "", "classes_"], [11, 2, 1, "", "classifier"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.aggregative.CC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DistributionMatching": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.DyS": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.EMQ": [[11, 3, 1, "", "EM"], [11, 4, 1, "", "EPSILON"], [11, 4, 1, "", "MAX_ITER"], [11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "predict_proba"]], "quapy.method.aggregative.HDy": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.OneVsAllAggregative": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"]], "quapy.method.aggregative.PACC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "classify"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "getPteCondEstim"]], "quapy.method.aggregative.PCC": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.SMM": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.aggregative.ThresholdOptimization": [[11, 3, 1, "", "aggregate"], [11, 3, 1, "", "fit"]], "quapy.method.base": [[11, 1, 1, "", "BaseQuantifier"], [11, 1, 1, "", "BinaryQuantifier"], [11, 1, 1, "", "OneVsAll"], [11, 1, 1, "", "OneVsAllGeneric"], [11, 5, 1, "", "newOneVsAll"]], "quapy.method.base.BaseQuantifier": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.base.OneVsAllGeneric": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.method.meta": [[11, 5, 1, "", "EACC"], [11, 5, 1, "", "ECC"], [11, 5, 1, "", "EEMQ"], [11, 5, 1, "", "EHDy"], [11, 5, 1, "", "EPACC"], [11, 1, 1, "", "Ensemble"], [11, 5, 1, "", "ensembleFactory"], [11, 5, 1, "", "get_probability_distribution"]], "quapy.method.meta.Ensemble": [[11, 4, 1, "", "VALID_POLICIES"], [11, 2, 1, "", "aggregative"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 2, 1, "", "probabilistic"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.neural": [[11, 1, 1, "", "QuaNetModule"], [11, 1, 1, "", "QuaNetTrainer"], [11, 5, 1, "", "mae_loss"]], "quapy.method.neural.QuaNetModule": [[11, 2, 1, "", "device"], [11, 3, 1, "", "forward"], [11, 4, 1, "", "training"]], "quapy.method.neural.QuaNetTrainer": [[11, 2, 1, "", "classes_"], [11, 3, 1, "", "clean_checkpoint"], [11, 3, 1, "", "clean_checkpoint_dir"], [11, 3, 1, "", "fit"], [11, 3, 1, "", "get_params"], [11, 3, 1, "", "quantify"], [11, 3, 1, "", "set_params"]], "quapy.method.non_aggregative": [[11, 1, 1, "", "MaximumLikelihoodPrevalenceEstimation"]], "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation": [[11, 3, 1, "", "fit"], [11, 3, 1, "", "quantify"]], "quapy.model_selection": [[8, 1, 1, "", "GridSearchQ"], [8, 5, 1, "", "cross_val_predict"]], "quapy.model_selection.GridSearchQ": [[8, 3, 1, "", "best_model"], [8, 3, 1, "", "fit"], [8, 3, 1, "", "get_params"], [8, 3, 1, "", "quantify"], [8, 3, 1, "", "set_params"]], "quapy.plot": [[8, 5, 1, "", "binary_bias_bins"], [8, 5, 1, "", "binary_bias_global"], [8, 5, 1, "", "binary_diagonal"], [8, 5, 1, "", "brokenbar_supremacy_by_drift"], [8, 5, 1, "", "error_by_drift"]], "quapy.protocol": [[8, 1, 1, "", "APP"], [8, 1, 1, "", "AbstractProtocol"], [8, 1, 1, "", "AbstractStochasticSeededProtocol"], [8, 1, 1, "", "DomainMixer"], [8, 1, 1, "", "NPP"], [8, 1, 1, "", "OnLabelledCollectionProtocol"], [8, 1, 1, "", "USimplexPP"]], "quapy.protocol.APP": [[8, 3, 1, "", "prevalence_grid"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.AbstractProtocol": [[8, 3, 1, "", "total"]], "quapy.protocol.AbstractStochasticSeededProtocol": [[8, 3, 1, "", "collator"], [8, 2, 1, "", "random_state"], [8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"]], "quapy.protocol.DomainMixer": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.NPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.protocol.OnLabelledCollectionProtocol": [[8, 4, 1, "", "RETURN_TYPES"], [8, 3, 1, "", "get_collator"], [8, 3, 1, "", "get_labelled_collection"], [8, 3, 1, "", "on_preclassified_instances"]], "quapy.protocol.USimplexPP": [[8, 3, 1, "", "sample"], [8, 3, 1, "", "samples_parameters"], [8, 3, 1, "", "total"]], "quapy.util": [[8, 1, 1, "", "EarlyStop"], [8, 5, 1, "", "create_if_not_exist"], [8, 5, 1, "", "create_parent_dir"], [8, 5, 1, "", "download_file"], [8, 5, 1, "", "download_file_if_not_exists"], [8, 5, 1, "", "get_quapy_home"], [8, 5, 1, "", "map_parallel"], [8, 5, 1, "", "parallel"], [8, 5, 1, "", "pickled_resource"], [8, 5, 1, "", "save_text_file"], [8, 5, 1, "", "temp_seed"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:property", "3": "py:method", "4": "py:attribute", "5": "py:function"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "property", "Python property"], "3": ["py", "method", "Python method"], "4": ["py", "attribute", "Python attribute"], "5": ["py", "function", "Python function"]}, "titleterms": {"dataset": [0, 10], "review": 0, "twitter": 0, "sentiment": 0, "uci": 0, "machin": 0, "learn": 0, "issu": 0, "ad": 0, "custom": 0, "data": [0, 10], "process": 0, "evalu": [1, 8], "error": [1, 5, 8], "measur": 1, "protocol": [1, 8], "instal": 2, "requir": 2, "svm": 2, "perf": 2, "quantif": [2, 3, 4, 5], "orient": [2, 4], "loss": [2, 3, 4], "method": [3, 9, 11], "aggreg": [3, 11], "The": 3, "classifi": 3, "count": 3, "variant": 3, "expect": 3, "maxim": 3, "emq": 3, "helling": 3, "distanc": 3, "y": 3, "hdy": 3, "explicit": 3, "minim": 3, "meta": [3, 11], "model": [3, 4], "ensembl": 3, "quanet": 3, "neural": [3, 9, 11], "network": 3, "select": 4, "target": 4, "classif": [4, 9], "plot": [5, 8], "diagon": 5, "bia": 5, "drift": 5, "welcom": 6, "quapi": [6, 7, 8, 9, 10, 11], "": 6, "document": 6, "introduct": 6, "A": 6, "quick": 6, "exampl": 6, "featur": 6, "content": [6, 8, 9, 10, 11], "indic": 6, "tabl": 6, "packag": [8, 9, 10, 11], "subpackag": 8, "submodul": [8, 9, 10, 11], "function": 8, "model_select": 8, "util": 8, "modul": [8, 9, 10, 11], "calibr": 9, "svmperf": 9, "base": [10, 11], "preprocess": 10, "reader": 10, "non_aggreg": 11}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 57}, "alltitles": {"Datasets": [[0, "datasets"]], "Reviews Datasets": [[0, "reviews-datasets"]], "Twitter Sentiment Datasets": [[0, "twitter-sentiment-datasets"]], "UCI Machine Learning": [[0, "uci-machine-learning"]], "Issues:": [[0, "issues"]], "Adding Custom Datasets": [[0, "adding-custom-datasets"]], "Data Processing": [[0, "data-processing"]], "Evaluation": [[1, "evaluation"]], "Error Measures": [[1, "error-measures"]], "Evaluation Protocols": [[1, "evaluation-protocols"]], "Installation": [[2, "installation"]], "Requirements": [[2, "requirements"]], "SVM-perf with quantification-oriented losses": [[2, "svm-perf-with-quantification-oriented-losses"]], "Quantification Methods": [[3, "quantification-methods"]], "Aggregative Methods": [[3, "aggregative-methods"]], "The Classify & Count variants": [[3, "the-classify-count-variants"]], "Expectation Maximization (EMQ)": [[3, "expectation-maximization-emq"]], "Hellinger Distance y (HDy)": [[3, "hellinger-distance-y-hdy"]], "Explicit Loss Minimization": [[3, "explicit-loss-minimization"]], "Meta Models": [[3, "meta-models"]], "Ensembles": [[3, "ensembles"]], "The QuaNet neural network": [[3, "the-quanet-neural-network"]], "Model Selection": [[4, "model-selection"]], "Targeting a Quantification-oriented loss": [[4, "targeting-a-quantification-oriented-loss"]], "Targeting a Classification-oriented loss": [[4, "targeting-a-classification-oriented-loss"]], "Plotting": [[5, "plotting"]], "Diagonal Plot": [[5, "diagonal-plot"]], "Quantification bias": [[5, "quantification-bias"]], "Error by Drift": [[5, "error-by-drift"]], "Welcome to QuaPy\u2019s documentation!": [[6, "welcome-to-quapy-s-documentation"]], "Introduction": [[6, "introduction"]], "A quick example:": [[6, "a-quick-example"]], "Features": [[6, "features"]], "Contents:": [[6, null]], "Indices and tables": [[6, "indices-and-tables"]], "quapy": [[7, "quapy"]], "quapy package": [[8, "quapy-package"]], "Submodules": [[8, "submodules"], [9, "submodules"], [10, "submodules"], [11, "submodules"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.protocol": [[8, "quapy-protocol"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.util": [[8, "module-quapy.util"]], "Subpackages": [[8, "subpackages"]], "Module contents": [[8, "module-quapy"], [9, "module-quapy.classification"], [10, "module-quapy.data"], [11, "module-quapy.method"]], "quapy.classification package": [[9, "quapy-classification-package"]], "quapy.classification.calibration": [[9, "quapy-classification-calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "quapy.data package": [[10, "quapy-data-package"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "quapy.method package": [[11, "quapy-method-package"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]]}, "indexentries": {"app (class in quapy.protocol)": [[8, "quapy.protocol.APP"]], "abstractprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractProtocol"]], "abstractstochasticseededprotocol (class in quapy.protocol)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol"]], "domainmixer (class in quapy.protocol)": [[8, "quapy.protocol.DomainMixer"]], "earlystop (class in quapy.util)": [[8, "quapy.util.EarlyStop"]], "gridsearchq (class in quapy.model_selection)": [[8, "quapy.model_selection.GridSearchQ"]], "hellingerdistance() (in module quapy.functional)": [[8, "quapy.functional.HellingerDistance"]], "npp (class in quapy.protocol)": [[8, "quapy.protocol.NPP"]], "onlabelledcollectionprotocol (class in quapy.protocol)": [[8, "quapy.protocol.OnLabelledCollectionProtocol"]], "return_types (quapy.protocol.onlabelledcollectionprotocol attribute)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.RETURN_TYPES"]], "topsoedistance() (in module quapy.functional)": [[8, "quapy.functional.TopsoeDistance"]], "usimplexpp (class in quapy.protocol)": [[8, "quapy.protocol.USimplexPP"]], "absolute_error() (in module quapy.error)": [[8, "quapy.error.absolute_error"]], "acc_error() (in module quapy.error)": [[8, "quapy.error.acc_error"]], "acce() (in module quapy.error)": [[8, "quapy.error.acce"]], "adjusted_quantification() (in module quapy.functional)": [[8, "quapy.functional.adjusted_quantification"]], "ae() (in module quapy.error)": [[8, "quapy.error.ae"]], "best_model() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.best_model"]], "binary_bias_bins() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_bins"]], "binary_bias_global() (in module quapy.plot)": [[8, "quapy.plot.binary_bias_global"]], "binary_diagonal() (in module quapy.plot)": [[8, "quapy.plot.binary_diagonal"]], "brokenbar_supremacy_by_drift() (in module quapy.plot)": [[8, "quapy.plot.brokenbar_supremacy_by_drift"]], "check_prevalence_vector() (in module quapy.functional)": [[8, "quapy.functional.check_prevalence_vector"]], "collator() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.collator"]], "create_if_not_exist() (in module quapy.util)": [[8, "quapy.util.create_if_not_exist"]], "create_parent_dir() (in module quapy.util)": [[8, "quapy.util.create_parent_dir"]], "cross_val_predict() (in module quapy.model_selection)": [[8, "quapy.model_selection.cross_val_predict"]], "download_file() (in module quapy.util)": [[8, "quapy.util.download_file"]], "download_file_if_not_exists() (in module quapy.util)": [[8, "quapy.util.download_file_if_not_exists"]], "error_by_drift() (in module quapy.plot)": [[8, "quapy.plot.error_by_drift"]], "evaluate() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluate"]], "evaluation_report() (in module quapy.evaluation)": [[8, "quapy.evaluation.evaluation_report"]], "f1_error() (in module quapy.error)": [[8, "quapy.error.f1_error"]], "f1e() (in module quapy.error)": [[8, "quapy.error.f1e"]], "fit() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.fit"]], "from_name() (in module quapy.error)": [[8, "quapy.error.from_name"]], "get_collator() (quapy.protocol.onlabelledcollectionprotocol class method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_collator"]], "get_labelled_collection() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection"]], "get_nprevpoints_approximation() (in module quapy.functional)": [[8, "quapy.functional.get_nprevpoints_approximation"]], "get_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.get_params"]], "get_quapy_home() (in module quapy.util)": [[8, "quapy.util.get_quapy_home"]], "kld() (in module quapy.error)": [[8, "quapy.error.kld"]], "mae() (in module quapy.error)": [[8, "quapy.error.mae"]], "map_parallel() (in module quapy.util)": [[8, "quapy.util.map_parallel"]], "mean_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_absolute_error"]], "mean_relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.mean_relative_absolute_error"]], "mkld() (in module quapy.error)": [[8, "quapy.error.mkld"]], "mnkld() (in module quapy.error)": [[8, "quapy.error.mnkld"]], "module": [[8, "module-quapy"], [8, "module-quapy.error"], [8, "module-quapy.evaluation"], [8, "module-quapy.functional"], [8, "module-quapy.model_selection"], [8, "module-quapy.plot"], [8, "module-quapy.protocol"], [8, "module-quapy.util"], [9, "module-quapy.classification"], [9, "module-quapy.classification.calibration"], [9, "module-quapy.classification.methods"], [9, "module-quapy.classification.neural"], [9, "module-quapy.classification.svmperf"], [10, "module-quapy.data"], [10, "module-quapy.data.base"], [10, "module-quapy.data.datasets"], [10, "module-quapy.data.preprocessing"], [10, "module-quapy.data.reader"], [11, "module-quapy.method"], [11, "module-quapy.method.aggregative"], [11, "module-quapy.method.base"], [11, "module-quapy.method.meta"], [11, "module-quapy.method.neural"], [11, "module-quapy.method.non_aggregative"]], "mrae() (in module quapy.error)": [[8, "quapy.error.mrae"]], "mse() (in module quapy.error)": [[8, "quapy.error.mse"]], "nkld() (in module quapy.error)": [[8, "quapy.error.nkld"]], "normalize_prevalence() (in module quapy.functional)": [[8, "quapy.functional.normalize_prevalence"]], "num_prevalence_combinations() (in module quapy.functional)": [[8, "quapy.functional.num_prevalence_combinations"]], "on_preclassified_instances() (quapy.protocol.onlabelledcollectionprotocol method)": [[8, "quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances"]], "parallel() (in module quapy.util)": [[8, "quapy.util.parallel"]], "pickled_resource() (in module quapy.util)": [[8, "quapy.util.pickled_resource"]], "prediction() (in module quapy.evaluation)": [[8, "quapy.evaluation.prediction"]], "prevalence_from_labels() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_labels"]], "prevalence_from_probabilities() (in module quapy.functional)": [[8, "quapy.functional.prevalence_from_probabilities"]], "prevalence_grid() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.prevalence_grid"]], "prevalence_linspace() (in module quapy.functional)": [[8, "quapy.functional.prevalence_linspace"]], "quantify() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.quantify"]], "quapy": [[8, "module-quapy"]], "quapy.error": [[8, "module-quapy.error"]], "quapy.evaluation": [[8, "module-quapy.evaluation"]], "quapy.functional": [[8, "module-quapy.functional"]], "quapy.model_selection": [[8, "module-quapy.model_selection"]], "quapy.plot": [[8, "module-quapy.plot"]], "quapy.protocol": [[8, "module-quapy.protocol"]], "quapy.util": [[8, "module-quapy.util"]], "rae() (in module quapy.error)": [[8, "quapy.error.rae"]], "random_state (quapy.protocol.abstractstochasticseededprotocol property)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.random_state"]], "relative_absolute_error() (in module quapy.error)": [[8, "quapy.error.relative_absolute_error"]], "sample() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.sample"]], "sample() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.sample"]], "sample() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.sample"]], "sample() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.sample"]], "sample() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.sample"]], "samples_parameters() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.samples_parameters"]], "samples_parameters() (quapy.protocol.abstractstochasticseededprotocol method)": [[8, "quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters"]], "samples_parameters() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.samples_parameters"]], "samples_parameters() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.samples_parameters"]], "samples_parameters() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.samples_parameters"]], "save_text_file() (in module quapy.util)": [[8, "quapy.util.save_text_file"]], "se() (in module quapy.error)": [[8, "quapy.error.se"]], "set_params() (quapy.model_selection.gridsearchq method)": [[8, "quapy.model_selection.GridSearchQ.set_params"]], "smooth() (in module quapy.error)": [[8, "quapy.error.smooth"]], "strprev() (in module quapy.functional)": [[8, "quapy.functional.strprev"]], "temp_seed() (in module quapy.util)": [[8, "quapy.util.temp_seed"]], "total() (quapy.protocol.app method)": [[8, "quapy.protocol.APP.total"]], "total() (quapy.protocol.abstractprotocol method)": [[8, "quapy.protocol.AbstractProtocol.total"]], "total() (quapy.protocol.domainmixer method)": [[8, "quapy.protocol.DomainMixer.total"]], "total() (quapy.protocol.npp method)": [[8, "quapy.protocol.NPP.total"]], "total() (quapy.protocol.usimplexpp method)": [[8, "quapy.protocol.USimplexPP.total"]], "uniform_prevalence_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_prevalence_sampling"]], "uniform_simplex_sampling() (in module quapy.functional)": [[8, "quapy.functional.uniform_simplex_sampling"]], "bctscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.BCTSCalibration"]], "cnnnet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.CNNnet"]], "lstmnet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.LSTMnet"]], "lowranklogisticregression (class in quapy.classification.methods)": [[9, "quapy.classification.methods.LowRankLogisticRegression"]], "nbvscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.NBVSCalibration"]], "neuralclassifiertrainer (class in quapy.classification.neural)": [[9, "quapy.classification.neural.NeuralClassifierTrainer"]], "recalibratedprobabilisticclassifier (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifier"]], "recalibratedprobabilisticclassifierbase (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase"]], "svmperf (class in quapy.classification.svmperf)": [[9, "quapy.classification.svmperf.SVMperf"]], "tscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.TSCalibration"]], "textclassifiernet (class in quapy.classification.neural)": [[9, "quapy.classification.neural.TextClassifierNet"]], "torchdataset (class in quapy.classification.neural)": [[9, "quapy.classification.neural.TorchDataset"]], "vscalibration (class in quapy.classification.calibration)": [[9, "quapy.classification.calibration.VSCalibration"]], "asdataloader() (quapy.classification.neural.torchdataset method)": [[9, "quapy.classification.neural.TorchDataset.asDataloader"]], "classes_ (quapy.classification.calibration.recalibratedprobabilisticclassifierbase property)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.classes_"]], "decision_function() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.decision_function"]], "device (quapy.classification.neural.neuralclassifiertrainer property)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.device"]], "dimensions() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.dimensions"]], "document_embedding() (quapy.classification.neural.cnnnet method)": [[9, "quapy.classification.neural.CNNnet.document_embedding"]], "document_embedding() (quapy.classification.neural.lstmnet method)": [[9, "quapy.classification.neural.LSTMnet.document_embedding"]], "document_embedding() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.document_embedding"]], "fit() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit"]], "fit() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.fit"]], "fit() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.fit"]], "fit() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.fit"]], "fit_cv() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_cv"]], "fit_tr_val() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_tr_val"]], "forward() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.forward"]], "get_params() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.get_params"]], "get_params() (quapy.classification.neural.cnnnet method)": [[9, "quapy.classification.neural.CNNnet.get_params"]], "get_params() (quapy.classification.neural.lstmnet method)": [[9, "quapy.classification.neural.LSTMnet.get_params"]], "get_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.get_params"]], "get_params() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.get_params"]], "predict() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict"]], "predict() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.predict"]], "predict() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.predict"]], "predict() (quapy.classification.svmperf.svmperf method)": [[9, "quapy.classification.svmperf.SVMperf.predict"]], "predict_proba() (quapy.classification.calibration.recalibratedprobabilisticclassifierbase method)": [[9, "quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict_proba"]], "predict_proba() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.predict_proba"]], "predict_proba() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.predict_proba"]], "predict_proba() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.predict_proba"]], "quapy.classification": [[9, "module-quapy.classification"]], "quapy.classification.calibration": [[9, "module-quapy.classification.calibration"]], "quapy.classification.methods": [[9, "module-quapy.classification.methods"]], "quapy.classification.neural": [[9, "module-quapy.classification.neural"]], "quapy.classification.svmperf": [[9, "module-quapy.classification.svmperf"]], "reset_net_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.reset_net_params"]], "set_params() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.set_params"]], "set_params() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.set_params"]], "training (quapy.classification.neural.cnnnet attribute)": [[9, "quapy.classification.neural.CNNnet.training"]], "training (quapy.classification.neural.lstmnet attribute)": [[9, "quapy.classification.neural.LSTMnet.training"]], "training (quapy.classification.neural.textclassifiernet attribute)": [[9, "quapy.classification.neural.TextClassifierNet.training"]], "transform() (quapy.classification.methods.lowranklogisticregression method)": [[9, "quapy.classification.methods.LowRankLogisticRegression.transform"]], "transform() (quapy.classification.neural.neuralclassifiertrainer method)": [[9, "quapy.classification.neural.NeuralClassifierTrainer.transform"]], "valid_losses (quapy.classification.svmperf.svmperf attribute)": [[9, "quapy.classification.svmperf.SVMperf.valid_losses"]], "vocabulary_size (quapy.classification.neural.cnnnet property)": [[9, "quapy.classification.neural.CNNnet.vocabulary_size"]], "vocabulary_size (quapy.classification.neural.lstmnet property)": [[9, "quapy.classification.neural.LSTMnet.vocabulary_size"]], "vocabulary_size (quapy.classification.neural.textclassifiernet property)": [[9, "quapy.classification.neural.TextClassifierNet.vocabulary_size"]], "xavier_uniform() (quapy.classification.neural.textclassifiernet method)": [[9, "quapy.classification.neural.TextClassifierNet.xavier_uniform"]], "dataset (class in quapy.data.base)": [[10, "quapy.data.base.Dataset"]], "indextransformer (class in quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.IndexTransformer"]], "labelledcollection (class in quapy.data.base)": [[10, "quapy.data.base.LabelledCollection"]], "splitstratified() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.SplitStratified"]], "x (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.X"]], "xp (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xp"]], "xy (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.Xy"]], "add_word() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.add_word"]], "binarize() (in module quapy.data.reader)": [[10, "quapy.data.reader.binarize"]], "binary (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.binary"]], "binary (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.binary"]], "classes_ (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.classes_"]], "counts() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.counts"]], "fetch_ucidataset() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCIDataset"]], "fetch_ucilabelledcollection() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_UCILabelledCollection"]], "fetch_lequa2022() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_lequa2022"]], "fetch_reviews() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_reviews"]], "fetch_twitter() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.fetch_twitter"]], "fit() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit"]], "fit_transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.fit_transform"]], "from_csv() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_csv"]], "from_sparse() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_sparse"]], "from_text() (in module quapy.data.reader)": [[10, "quapy.data.reader.from_text"]], "index() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.index"]], "kfcv() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.kFCV"]], "kfcv() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.kFCV"]], "load() (quapy.data.base.dataset class method)": [[10, "quapy.data.base.Dataset.load"]], "load() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.load"]], "mix() (quapy.data.base.labelledcollection class method)": [[10, "quapy.data.base.LabelledCollection.mix"]], "n_classes (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.n_classes"]], "n_classes (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.n_classes"]], "p (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.p"]], "prevalence() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.prevalence"]], "quapy.data": [[10, "module-quapy.data"]], "quapy.data.base": [[10, "module-quapy.data.base"]], "quapy.data.datasets": [[10, "module-quapy.data.datasets"]], "quapy.data.preprocessing": [[10, "module-quapy.data.preprocessing"]], "quapy.data.reader": [[10, "module-quapy.data.reader"]], "reduce_columns() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.reduce_columns"]], "reindex_labels() (in module quapy.data.reader)": [[10, "quapy.data.reader.reindex_labels"]], "sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling"]], "sampling_from_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_from_index"]], "sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.sampling_index"]], "split_random() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_random"]], "split_stratified() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.split_stratified"]], "standardize() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.standardize"]], "stats() (quapy.data.base.dataset method)": [[10, "quapy.data.base.Dataset.stats"]], "stats() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.stats"]], "text2tfidf() (in module quapy.data.preprocessing)": [[10, "quapy.data.preprocessing.text2tfidf"]], "train_test (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.train_test"]], "transform() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.transform"]], "uniform_sampling() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling"]], "uniform_sampling_index() (quapy.data.base.labelledcollection method)": [[10, "quapy.data.base.LabelledCollection.uniform_sampling_index"]], "vocabulary_size (quapy.data.base.dataset property)": [[10, "quapy.data.base.Dataset.vocabulary_size"]], "vocabulary_size() (quapy.data.preprocessing.indextransformer method)": [[10, "quapy.data.preprocessing.IndexTransformer.vocabulary_size"]], "warn() (in module quapy.data.datasets)": [[10, "quapy.data.datasets.warn"]], "y (quapy.data.base.labelledcollection property)": [[10, "quapy.data.base.LabelledCollection.y"]], "acc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ACC"]], "adjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.AdjustedClassifyAndCount"]], "aggregativeprobabilisticquantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier"]], "aggregativequantifier (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.AggregativeQuantifier"]], "basequantifier (class in quapy.method.base)": [[11, "quapy.method.base.BaseQuantifier"]], "binaryquantifier (class in quapy.method.base)": [[11, "quapy.method.base.BinaryQuantifier"]], "cc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.CC"]], "classifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ClassifyAndCount"]], "distributionmatching (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DistributionMatching"]], "dys (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.DyS"]], "eacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EACC"]], "ecc() (in module quapy.method.meta)": [[11, "quapy.method.meta.ECC"]], "eemq() (in module quapy.method.meta)": [[11, "quapy.method.meta.EEMQ"]], "ehdy() (in module quapy.method.meta)": [[11, "quapy.method.meta.EHDy"]], "em() (quapy.method.aggregative.emq class method)": [[11, "quapy.method.aggregative.EMQ.EM"]], "emq (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.EMQ"]], "epacc() (in module quapy.method.meta)": [[11, "quapy.method.meta.EPACC"]], "epsilon (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.EPSILON"]], "ensemble (class in quapy.method.meta)": [[11, "quapy.method.meta.Ensemble"]], "expectationmaximizationquantifier (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ExpectationMaximizationQuantifier"]], "hdy (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.HDy"]], "hellingerdistancey (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.HellingerDistanceY"]], "max (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MAX"]], "max_iter (quapy.method.aggregative.emq attribute)": [[11, "quapy.method.aggregative.EMQ.MAX_ITER"]], "ms (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS"]], "ms2 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.MS2"]], "maximumlikelihoodprevalenceestimation (class in quapy.method.non_aggregative)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation"]], "mediansweep (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep"]], "mediansweep2 (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.MedianSweep2"]], "onevsall (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAll"]], "onevsallaggregative (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.OneVsAllAggregative"]], "onevsallgeneric (class in quapy.method.base)": [[11, "quapy.method.base.OneVsAllGeneric"]], "pacc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PACC"]], "pcc (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.PCC"]], "probabilisticadjustedclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticAdjustedClassifyAndCount"]], "probabilisticclassifyandcount (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.ProbabilisticClassifyAndCount"]], "quanetmodule (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetModule"]], "quanettrainer (class in quapy.method.neural)": [[11, "quapy.method.neural.QuaNetTrainer"]], "sld (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.SLD"]], "smm (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.SMM"]], "t50 (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.T50"]], "thresholdoptimization (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.ThresholdOptimization"]], "valid_policies (quapy.method.meta.ensemble attribute)": [[11, "quapy.method.meta.Ensemble.VALID_POLICIES"]], "x (class in quapy.method.aggregative)": [[11, "quapy.method.aggregative.X"]], "aggregate() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.aggregate"]], "aggregate() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.aggregate"]], "aggregate() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.aggregate"]], "aggregate() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.aggregate"]], "aggregate() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.aggregate"]], "aggregate() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.aggregate"]], "aggregate() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.aggregate"]], "aggregate() (quapy.method.aggregative.onevsallaggregative method)": [[11, "quapy.method.aggregative.OneVsAllAggregative.aggregate"]], "aggregate() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.aggregate"]], "aggregate() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.aggregate"]], "aggregate() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.aggregate"]], "aggregate() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.aggregate"]], "aggregative (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.aggregative"]], "classes_ (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classes_"]], "classes_ (quapy.method.base.onevsallgeneric property)": [[11, "quapy.method.base.OneVsAllGeneric.classes_"]], "classes_ (quapy.method.neural.quanettrainer property)": [[11, "quapy.method.neural.QuaNetTrainer.classes_"]], "classifier (quapy.method.aggregative.aggregativequantifier property)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classifier"]], "classify() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.classify"]], "classify() (quapy.method.aggregative.aggregativeprobabilisticquantifier method)": [[11, "quapy.method.aggregative.AggregativeProbabilisticQuantifier.classify"]], "classify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.classify"]], "classify() (quapy.method.aggregative.onevsallaggregative method)": [[11, "quapy.method.aggregative.OneVsAllAggregative.classify"]], "classify() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.classify"]], "clean_checkpoint() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint"]], "clean_checkpoint_dir() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.clean_checkpoint_dir"]], "cross_generate_predictions() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions"]], "cross_generate_predictions_depr() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.cross_generate_predictions_depr"]], "device (quapy.method.neural.quanetmodule property)": [[11, "quapy.method.neural.QuaNetModule.device"]], "ensemblefactory() (in module quapy.method.meta)": [[11, "quapy.method.meta.ensembleFactory"]], "fit() (quapy.method.aggregative.acc method)": [[11, "quapy.method.aggregative.ACC.fit"]], "fit() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.fit"]], "fit() (quapy.method.aggregative.cc method)": [[11, "quapy.method.aggregative.CC.fit"]], "fit() (quapy.method.aggregative.distributionmatching method)": [[11, "quapy.method.aggregative.DistributionMatching.fit"]], "fit() (quapy.method.aggregative.dys method)": [[11, "quapy.method.aggregative.DyS.fit"]], "fit() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.fit"]], "fit() (quapy.method.aggregative.hdy method)": [[11, "quapy.method.aggregative.HDy.fit"]], "fit() (quapy.method.aggregative.pacc method)": [[11, "quapy.method.aggregative.PACC.fit"]], "fit() (quapy.method.aggregative.pcc method)": [[11, "quapy.method.aggregative.PCC.fit"]], "fit() (quapy.method.aggregative.smm method)": [[11, "quapy.method.aggregative.SMM.fit"]], "fit() (quapy.method.aggregative.thresholdoptimization method)": [[11, "quapy.method.aggregative.ThresholdOptimization.fit"]], "fit() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.fit"]], "fit() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.fit"]], "fit() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.fit"]], "fit() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.fit"]], "fit() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit"]], "forward() (quapy.method.neural.quanetmodule method)": [[11, "quapy.method.neural.QuaNetModule.forward"]], "getptecondestim() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.getPteCondEstim"]], "getptecondestim() (quapy.method.aggregative.pacc class method)": [[11, "quapy.method.aggregative.PACC.getPteCondEstim"]], "get_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.get_params"]], "get_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.get_params"]], "get_probability_distribution() (in module quapy.method.meta)": [[11, "quapy.method.meta.get_probability_distribution"]], "mae_loss() (in module quapy.method.neural)": [[11, "quapy.method.neural.mae_loss"]], "newelm() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.newELM"]], "newonevsall() (in module quapy.method.base)": [[11, "quapy.method.base.newOneVsAll"]], "newsvmae() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.newSVMAE"]], "newsvmkld() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.newSVMKLD"]], "newsvmq() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.newSVMQ"]], "newsvmrae() (in module quapy.method.aggregative)": [[11, "quapy.method.aggregative.newSVMRAE"]], "predict_proba() (quapy.method.aggregative.emq method)": [[11, "quapy.method.aggregative.EMQ.predict_proba"]], "probabilistic (quapy.method.meta.ensemble property)": [[11, "quapy.method.meta.Ensemble.probabilistic"]], "quantify() (quapy.method.aggregative.aggregativequantifier method)": [[11, "quapy.method.aggregative.AggregativeQuantifier.quantify"]], "quantify() (quapy.method.base.basequantifier method)": [[11, "quapy.method.base.BaseQuantifier.quantify"]], "quantify() (quapy.method.base.onevsallgeneric method)": [[11, "quapy.method.base.OneVsAllGeneric.quantify"]], "quantify() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.quantify"]], "quantify() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.quantify"]], "quantify() (quapy.method.non_aggregative.maximumlikelihoodprevalenceestimation method)": [[11, "quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify"]], "quapy.method": [[11, "module-quapy.method"]], "quapy.method.aggregative": [[11, "module-quapy.method.aggregative"]], "quapy.method.base": [[11, "module-quapy.method.base"]], "quapy.method.meta": [[11, "module-quapy.method.meta"]], "quapy.method.neural": [[11, "module-quapy.method.neural"]], "quapy.method.non_aggregative": [[11, "module-quapy.method.non_aggregative"]], "set_params() (quapy.method.meta.ensemble method)": [[11, "quapy.method.meta.Ensemble.set_params"]], "set_params() (quapy.method.neural.quanettrainer method)": [[11, "quapy.method.neural.QuaNetTrainer.set_params"]], "solve_adjustment() (quapy.method.aggregative.acc class method)": [[11, "quapy.method.aggregative.ACC.solve_adjustment"]], "training (quapy.method.neural.quanetmodule attribute)": [[11, "quapy.method.neural.QuaNetModule.training"]]}}) \ No newline at end of file diff --git a/examples/quanet_example.py b/examples/quanet_example.py new file mode 100644 index 0000000..4be3132 --- /dev/null +++ b/examples/quanet_example.py @@ -0,0 +1,35 @@ +import quapy as qp +from quapy.classification.neural import CNNnet +from quapy.classification.neural import NeuralClassifierTrainer +from quapy.method.meta import QuaNet +import quapy.functional as F + +""" +This example shows how to train QuaNet. The internal classifier is a word-based CNN. +""" + +# set the sample size in the environment +qp.environ["SAMPLE_SIZE"] = 100 + +# the dataset is textual (Kindle reviews from Amazon), so we need to index terms, i.e., +# we need to convert distinct terms into numerical ids +dataset = qp.datasets.fetch_reviews('kindle', pickle=True) +qp.data.preprocessing.index(dataset, min_df=5, inplace=True) +train, test = dataset.train_test + +# train the text classifier: +cnn_module = CNNnet(dataset.vocabulary_size, dataset.training.n_classes) +cnn_classifier = NeuralClassifierTrainer(cnn_module, device='cuda') +cnn_classifier.fit(*dataset.training.Xy) + +# train QuaNet (alternatively, we can set fit_classifier=True and let QuaNet train the classifier) +quantifier = QuaNet(cnn_classifier, device='cuda') +quantifier.fit(train, fit_classifier=False) + +# prediction and evaluation +estim_prevalence = quantifier.quantify(test.instances) +mae = qp.error.mae(test.prevalence(), estim_prevalence) + +print(f'true prevalence: {F.strprev(test.prevalence())}') +print(f'estim prevalence: {F.strprev(estim_prevalence)}') +print(f'MAE = {mae:.4f}') \ No newline at end of file diff --git a/quapy/classification/neural.py b/quapy/classification/neural.py index 0d576c5..dc8de5b 100644 --- a/quapy/classification/neural.py +++ b/quapy/classification/neural.py @@ -229,11 +229,11 @@ class NeuralClassifierTrainer: self.net.eval() opt = self.trainer_hyperparams with torch.no_grad(): - positive_probs = [] + posteriors = [] for xi in TorchDataset(instances).asDataloader( opt['batch_size_test'], shuffle=False, pad_length=opt['padding_length'], device=opt['device']): - positive_probs.append(self.net.predict_proba(xi)) - return np.concatenate(positive_probs) + posteriors.append(self.net.predict_proba(xi)) + return np.concatenate(posteriors) def transform(self, instances): """ diff --git a/quapy/data/base.py b/quapy/data/base.py index 7093821..ef3b7f2 100644 --- a/quapy/data/base.py +++ b/quapy/data/base.py @@ -523,3 +523,14 @@ class Dataset: yield Dataset(train, test, name=f'fold {(i % nfolds) + 1}/{nfolds} (round={(i // nfolds) + 1})') + def reduce(self, n_train=100, n_test=100): + """ + Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set. + + :param n_train: number of training documents to keep (default 100) + :param n_test: number of test documents to keep (default 100) + :return: self + """ + self.training = self.training.sampling(n_train, *self.training.prevalence()) + self.test = self.test.sampling(n_test, *self.test.prevalence()) + return self \ No newline at end of file diff --git a/quapy/data/preprocessing.py b/quapy/data/preprocessing.py index e65ccf7..9aa8f8b 100644 --- a/quapy/data/preprocessing.py +++ b/quapy/data/preprocessing.py @@ -121,6 +121,9 @@ def index(dataset: Dataset, min_df=5, inplace=False, **kwargs): training_index = indexer.fit_transform(dataset.training.instances) test_index = indexer.transform(dataset.test.instances) + training_index = np.asarray(training_index, dtype=object) + test_index = np.asarray(test_index, dtype=object) + if inplace: dataset.training = LabelledCollection(training_index, dataset.training.labels, dataset.classes_) dataset.test = LabelledCollection(test_index, dataset.test.labels, dataset.classes_) @@ -181,12 +184,12 @@ class IndexTransformer: # given the number of tasks and the number of jobs, generates the slices for the parallel processes assert self.unk != -1, 'transform called before fit' n_jobs = qp._get_njobs(n_jobs) - indexed = map_parallel(func=self._index, args=X, n_jobs=n_jobs) - return np.asarray(indexed) + return map_parallel(func=self._index, args=X, n_jobs=n_jobs) + def _index(self, documents): vocab = self.vocabulary_.copy() - return [[vocab.prevalence(word, self.unk) for word in self.analyzer(doc)] for doc in tqdm(documents, 'indexing')] + return [[vocab.get(word, self.unk) for word in self.analyzer(doc)] for doc in tqdm(documents, 'indexing')] def fit_transform(self, X, n_jobs=None): """ diff --git a/quapy/evaluation.py b/quapy/evaluation.py index 95193aa..4f5de10 100644 --- a/quapy/evaluation.py +++ b/quapy/evaluation.py @@ -2,7 +2,7 @@ from typing import Union, Callable, Iterable import numpy as np from tqdm import tqdm import quapy as qp -from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol +from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol, IterateProtocol from quapy.method.base import BaseQuantifier import pandas as pd @@ -94,5 +94,15 @@ def evaluate( return error_metric(true_prevs, estim_prevs) +def evaluate_on_samples( + model: BaseQuantifier, + samples: [qp.data.LabelledCollection], + error_metric:Union[str, Callable], + verbose=False): + + return evaluate(model, IterateProtocol(samples), error_metric, aggr_speedup=False, verbose=verbose) + + + diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index b872ba3..bb63c64 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -338,7 +338,7 @@ class ACC(AggregativeQuantifier): ) self.cc = CC(self.classifier) - self.Pte_cond_estim_ = self.getPteCondEstim(data.classes_, y, y_) + self.Pte_cond_estim_ = self.getPteCondEstim(self.classifier.classes_, y, y_) return self @@ -996,7 +996,7 @@ def newSVMAE(svmperf_base=None, C=1): """ return newELM(svmperf_base, loss='mae', C=C) -def newSVMAE(svmperf_base=None, C=1): +def newSVMRAE(svmperf_base=None, C=1): """ SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first used by `Moreo and Sebastiani, 2021 `_. diff --git a/quapy/method/meta.py b/quapy/method/meta.py index ba682ee..6db6861 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -7,6 +7,7 @@ from sklearn.model_selection import GridSearchCV, cross_val_predict from tqdm import tqdm import quapy as qp +from evaluation import evaluate_on_samples from quapy import functional as F from quapy.data import LabelledCollection from quapy.model_selection import GridSearchQ @@ -182,7 +183,7 @@ class Ensemble(BaseQuantifier): tests = [m[3] for m in self.ensemble] scores = [] for i, model in enumerate(self.ensemble): - scores.append(evaluate(model[0], tests[:i] + tests[i + 1:], error, self.n_jobs)) + scores.append(evaluate_on_samples(model[0], tests[:i] + tests[i + 1:], error)) order = np.argsort(scores) self.ensemble = _select_k(self.ensemble, order, k=self.red_size) diff --git a/quapy/method/neural.py b/quapy/method/neural.py index 1871ff0..e348930 100644 --- a/quapy/method/neural.py +++ b/quapy/method/neural.py @@ -6,6 +6,7 @@ import torch from torch.nn import MSELoss from torch.nn.functional import relu +from protocol import USimplexPP from quapy.method.aggregative import * from quapy.util import EarlyStop @@ -41,7 +42,8 @@ class QuaNetTrainer(BaseQuantifier): :param classifier: an object implementing `fit` (i.e., that can be trained on labelled data), `predict_proba` (i.e., that can generate posterior probabilities of unlabelled examples) and `transform` (i.e., that can generate embedded representations of the unlabelled instances). - :param sample_size: integer, the sample size + :param sample_size: integer, the sample size; default is None, meaning that the sample size should be + taken from qp.environ["SAMPLE_SIZE"] :param n_epochs: integer, maximum number of training epochs :param tr_iter_per_poch: integer, number of training iterations before considering an epoch complete :param va_iter_per_poch: integer, number of validation iterations to perform after each epoch @@ -61,7 +63,7 @@ class QuaNetTrainer(BaseQuantifier): def __init__(self, classifier, - sample_size, + sample_size=None, n_epochs=100, tr_iter_per_poch=500, va_iter_per_poch=100, @@ -83,7 +85,7 @@ class QuaNetTrainer(BaseQuantifier): f'the classifier {classifier.__class__.__name__} does not seem to be able to produce posterior probabilities ' \ f'since it does not implement the method "predict_proba"' self.classifier = classifier - self.sample_size = sample_size + self.sample_size = qp._get_sample_size(sample_size) self.n_epochs = n_epochs self.tr_iter = tr_iter_per_poch self.va_iter = va_iter_per_poch @@ -216,16 +218,13 @@ class QuaNetTrainer(BaseQuantifier): self.quanet.train(mode=train) losses = [] mae_errors = [] - if train==False: - prevpoints = F.get_nprevpoints_approximation(iterations, self.quanet.n_classes) - iterations = F.num_prevalence_combinations(prevpoints, self.quanet.n_classes) - with qp.util.temp_seed(0): - sampling_index_gen = data.artificial_sampling_index_generator(self.sample_size, prevpoints) - else: - sampling_index_gen = [data.sampling_index(self.sample_size, *prev) for prev in - F.uniform_simplex_sampling(data.n_classes, iterations)] - pbar = tqdm(sampling_index_gen, total=iterations) if train else sampling_index_gen - + sampler = USimplexPP( + data, + sample_size=self.sample_size, + repeats=iterations, + random_state=None if train else 0 # different samples during train, same samples during validation + ) + pbar = tqdm(sampler.samples_parameters(), total=sampler.total()) for it, index in enumerate(pbar): sample_data = data.sampling_from_index(index) sample_posteriors = posteriors[index] diff --git a/quapy/protocol.py b/quapy/protocol.py index 70f4a48..60df09c 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -34,6 +34,34 @@ class AbstractProtocol(metaclass=ABCMeta): return None +class IterateProtocol(AbstractProtocol): + """ + A very simple protocol which simply iterates over a list of previously generated samples + + :param samples: a list of :class:`quapy.data.base.LabelledCollection` + """ + def __init__(self, samples: [LabelledCollection]): + self.samples = samples + + def __call__(self): + """ + Yields one sample from the initial list at a time + + :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances + and in which `prev` is an `nd.array` with the class prevalence values + """ + for sample in self.samples: + yield sample.Xp + + def total(self): + """ + Returns the number of samples in this protocol + + :return: int + """ + return len(self.samples) + + class AbstractStochasticSeededProtocol(AbstractProtocol): """ An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g., @@ -107,7 +135,7 @@ class OnLabelledCollectionProtocol: Protocols that generate samples from a :class:`qp.data.LabelledCollection` object. """ - RETURN_TYPES = ['sample_prev', 'labelled_collection'] + RETURN_TYPES = ['sample_prev', 'labelled_collection', 'index'] def get_labelled_collection(self): """ diff --git a/quapy/tests/test_evaluation.py b/quapy/tests/test_evaluation.py index 9a77867..db1ddc6 100644 --- a/quapy/tests/test_evaluation.py +++ b/quapy/tests/test_evaluation.py @@ -39,10 +39,6 @@ class EvalTestCase(unittest.TestCase): self.emq.fit(data) return self - def set_params(self, **parameters): pass - def get_params(self, deep=True): pass - - emq = NonAggregativeEMQ(SlowLR()).fit(train) tinit = time() diff --git a/quapy/tests/test_hierarchy.py b/quapy/tests/test_hierarchy.py index 21af4b6..2ea3af5 100644 --- a/quapy/tests/test_hierarchy.py +++ b/quapy/tests/test_hierarchy.py @@ -27,6 +27,5 @@ class HierarchyTestCase(unittest.TestCase): self.assertEqual(isinstance(m, AggregativeProbabilisticQuantifier), True) - if __name__ == '__main__': unittest.main() diff --git a/quapy/tests/test_labelcollection.py b/quapy/tests/test_labelcollection.py new file mode 100644 index 0000000..845f763 --- /dev/null +++ b/quapy/tests/test_labelcollection.py @@ -0,0 +1,21 @@ +import unittest +import numpy as np +import quapy as qp + + +class LabelCollectionTestCase(unittest.TestCase): + def test_split(self): + x = np.arange(100) + y = np.random.randint(0,5,100) + data = qp.data.LabelledCollection(x,y) + tr, te = data.split_random(0.7) + check_prev = tr.prevalence()*0.7 + te.prevalence()*0.3 + + self.assertEqual(len(tr), 70) + self.assertEqual(len(te), 30) + self.assertEqual(np.allclose(check_prev, data.prevalence()), True) + self.assertEqual(len(tr+te), len(data)) + + +if __name__ == '__main__': + unittest.main() diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py index f13907c..4da5617 100644 --- a/quapy/tests/test_methods.py +++ b/quapy/tests/test_methods.py @@ -6,18 +6,21 @@ from sklearn.svm import LinearSVC import quapy as qp from quapy.method.base import BinaryQuantifier from quapy.data import Dataset, LabelledCollection -from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS, EXPLICIT_LOSS_MINIMIZATION_METHODS +from quapy.method import AGGREGATIVE_METHODS, NON_AGGREGATIVE_METHODS from quapy.method.aggregative import ACC, PACC, HDy from quapy.method.meta import Ensemble -datasets = [pytest.param(qp.datasets.fetch_twitter('hcr'), id='hcr'), +datasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True), id='hcr'), pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')] +tinydatasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True).reduce(), id='tiny_hcr'), + pytest.param(qp.datasets.fetch_UCIDataset('ionosphere').reduce(), id='tiny_ionosphere')] + learners = [LogisticRegression, LinearSVC] @pytest.mark.parametrize('dataset', datasets) -@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS)) +@pytest.mark.parametrize('aggregative_method', AGGREGATIVE_METHODS) @pytest.mark.parametrize('learner', learners) def test_aggregative_methods(dataset: Dataset, aggregative_method, learner): model = aggregative_method(learner()) @@ -36,30 +39,6 @@ def test_aggregative_methods(dataset: Dataset, aggregative_method, learner): assert type(error) == numpy.float64 -@pytest.mark.parametrize('dataset', datasets) -@pytest.mark.parametrize('elm_method', EXPLICIT_LOSS_MINIMIZATION_METHODS) -def test_elm_methods(dataset: Dataset, elm_method): - try: - model = elm_method() - except AssertionError as ae: - if ae.args[0].find('does not seem to point to a valid path') > 0: - print('Missing SVMperf binary program, skipping test') - return - - if isinstance(model, BinaryQuantifier) and not dataset.binary: - print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') - return - - model.fit(dataset.training) - - estim_prevalences = model.quantify(dataset.test.instances) - - true_prevalences = dataset.test.prevalence() - error = qp.error.mae(true_prevalences, estim_prevalences) - - assert type(error) == numpy.float64 - - @pytest.mark.parametrize('dataset', datasets) @pytest.mark.parametrize('non_aggregative_method', NON_AGGREGATIVE_METHODS) def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method): @@ -79,16 +58,20 @@ def test_non_aggregative_methods(dataset: Dataset, non_aggregative_method): assert type(error) == numpy.float64 -@pytest.mark.parametrize('base_method', AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS)) -@pytest.mark.parametrize('learner', learners) -@pytest.mark.parametrize('dataset', datasets) +@pytest.mark.parametrize('base_method', AGGREGATIVE_METHODS) +@pytest.mark.parametrize('learner', [LogisticRegression]) +@pytest.mark.parametrize('dataset', tinydatasets) @pytest.mark.parametrize('policy', Ensemble.VALID_POLICIES) def test_ensemble_method(base_method, learner, dataset: Dataset, policy): - qp.environ['SAMPLE_SIZE'] = len(dataset.training) - model = Ensemble(quantifier=base_method(learner()), size=5, policy=policy, n_jobs=-1) - if isinstance(model, BinaryQuantifier) and not dataset.binary: - print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') + qp.environ['SAMPLE_SIZE'] = 20 + base_quantifier=base_method(learner()) + if isinstance(base_quantifier, BinaryQuantifier) and not dataset.binary: + print(f'skipping the test of binary model {base_quantifier} on non-binary dataset {dataset}') return + if not dataset.binary and policy=='ds': + print(f'skipping the test of binary policy ds on non-binary dataset {dataset}') + return + model = Ensemble(quantifier=base_quantifier, size=5, policy=policy, n_jobs=-1) model.fit(dataset.training) @@ -107,19 +90,23 @@ def test_quanet_method(): print('skipping QuaNet test due to missing torch package') return + + qp.environ['SAMPLE_SIZE'] = 100 + + # load the kindle dataset as text, and convert words to numerical indexes dataset = qp.datasets.fetch_reviews('kindle', pickle=True) - dataset = Dataset(dataset.training.sampling(100, *dataset.training.prevalence()), - dataset.test.sampling(100, *dataset.test.prevalence())) + dataset = Dataset(dataset.training.sampling(200, *dataset.training.prevalence()), + dataset.test.sampling(200, *dataset.test.prevalence())) qp.data.preprocessing.index(dataset, min_df=5, inplace=True) from quapy.classification.neural import CNNnet - cnn = CNNnet(dataset.vocabulary_size, dataset.training.n_classes) + cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes) from quapy.classification.neural import NeuralClassifierTrainer learner = NeuralClassifierTrainer(cnn, device='cuda') from quapy.method.meta import QuaNet - model = QuaNet(learner, sample_size=len(dataset.training), device='cuda') + model = QuaNet(learner, device='cuda') if isinstance(model, BinaryQuantifier) and not dataset.binary: print(f'skipping the test of binary model {model} on non-binary dataset {dataset}') @@ -135,26 +122,12 @@ def test_quanet_method(): assert type(error) == numpy.float64 -def models_to_test_for_str_label_names(): - models = list() - learner = LogisticRegression - for method in AGGREGATIVE_METHODS.difference(EXPLICIT_LOSS_MINIMIZATION_METHODS): - models.append(method(learner(random_state=0))) - for method in NON_AGGREGATIVE_METHODS: - models.append(method()) - return models - - -@pytest.mark.parametrize('model', models_to_test_for_str_label_names()) -def test_str_label_names(model): - if type(model) in {ACC, PACC, HDy}: - print( - f'skipping the test of binary model {type(model)} because it currently does not support random seed control.') - return +def test_str_label_names(): + model = qp.method.aggregative.CC(LogisticRegression()) dataset = qp.datasets.fetch_reviews('imdb', pickle=True) dataset = Dataset(dataset.training.sampling(1000, *dataset.training.prevalence()), - dataset.test.sampling(1000, *dataset.test.prevalence())) + dataset.test.sampling(1000, 0.25, 0.75)) qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True) numpy.random.seed(0) From 25a829996e0fe141137c1cdb1597d5628c01ea68 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 14 Feb 2023 11:14:38 +0100 Subject: [PATCH 57/59] evaluation updated --- examples/explicit_loss_minimization.py | 6 +- examples/one_vs_all.py | 8 +-- quapy/CHANGE_LOG.txt | 2 +- quapy/error.py | 4 +- quapy/evaluation.py | 91 ++++++++++++++++++++++++-- quapy/method/neural.py | 4 +- quapy/protocol.py | 13 ++-- quapy/tests/test_evaluation.py | 33 +++++++++- quapy/tests/test_protocols.py | 8 +-- 9 files changed, 143 insertions(+), 26 deletions(-) diff --git a/examples/explicit_loss_minimization.py b/examples/explicit_loss_minimization.py index cefbb3c..fcc07f3 100644 --- a/examples/explicit_loss_minimization.py +++ b/examples/explicit_loss_minimization.py @@ -2,7 +2,7 @@ import quapy as qp from quapy.method.aggregative import newELM from quapy.method.base import newOneVsAll from quapy.model_selection import GridSearchQ -from quapy.protocol import USimplexPP +from quapy.protocol import UPP """ In this example, we will show hoy to define a quantifier based on explicit loss minimization (ELM). @@ -57,7 +57,7 @@ param_grid = { 'binary_quantifier__classifier__C': [0.01, 1, 100], # classifier-dependent hyperparameter } print('starting model selection') -model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False) +model_selection = GridSearchQ(quantifier, param_grid, protocol=UPP(val), verbose=True, refit=False) quantifier = model_selection.fit(train_modsel).best_model() print('training on the whole training set') @@ -65,7 +65,7 @@ train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle quantifier.fit(train) # evaluation -mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae') +mae = qp.evaluation.evaluate(quantifier, protocol=UPP(test), error_metric='mae') print(f'MAE = {mae:.4f}') diff --git a/examples/one_vs_all.py b/examples/one_vs_all.py index 8aad376..3f5c4ac 100644 --- a/examples/one_vs_all.py +++ b/examples/one_vs_all.py @@ -2,7 +2,7 @@ import quapy as qp from quapy.method.aggregative import MS2 from quapy.method.base import newOneVsAll from quapy.model_selection import GridSearchQ -from quapy.protocol import USimplexPP +from quapy.protocol import UPP from sklearn.linear_model import LogisticRegression import numpy as np @@ -29,7 +29,7 @@ print(f'the quantifier is an instance of {quantifier.__class__.__name__}') train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test """ -model selection: for this example, we are relying on the USimplexPP protocol, i.e., a variant of the +model selection: for this example, we are relying on the UPP protocol, i.e., a variant of the artificial-prevalence protocol that generates random samples (100 in this case) for randomly picked priors from the unit simplex. The priors are sampled using the Kraemer algorithm. Note this is in contrast to the standard APP protocol, that instead explores a prefixed grid of prevalence values. @@ -39,7 +39,7 @@ param_grid = { 'binary_quantifier__classifier__class_weight': ['balanced', None] # classifier-dependent hyperparameter } print('starting model selection') -model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False) +model_selection = GridSearchQ(quantifier, param_grid, protocol=UPP(val), verbose=True, refit=False) quantifier = model_selection.fit(train_modsel).best_model() print('training on the whole training set') @@ -47,7 +47,7 @@ train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle quantifier.fit(train) # evaluation -mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae') +mae = qp.evaluation.evaluate(quantifier, protocol=UPP(test), error_metric='mae') print(f'MAE = {mae:.4f}') diff --git a/quapy/CHANGE_LOG.txt b/quapy/CHANGE_LOG.txt index 48cb586..3dae8ca 100644 --- a/quapy/CHANGE_LOG.txt +++ b/quapy/CHANGE_LOG.txt @@ -3,7 +3,7 @@ Change Log 0.1.7 - Protocols are now abstracted as instances of AbstractProtocol. There is a new class extending AbstractProtocol called AbstractStochasticSeededProtocol, which implements a seeding policy to allow replicate the series of samplings. - There are some examples of protocols, APP, NPP, USimplexPP, DomainMixer (experimental). + There are some examples of protocols, APP, NPP, UPP, DomainMixer (experimental). The idea is to start the sampling by simply calling the __call__ method. This change has a great impact in the framework, since many functions in qp.evaluation, qp.model_selection, and sampling functions in LabelledCollection relied of the old functions. E.g., the functionality of diff --git a/quapy/error.py b/quapy/error.py index c0cd157..c1a8e7f 100644 --- a/quapy/error.py +++ b/quapy/error.py @@ -211,11 +211,13 @@ def __check_eps(eps=None): CLASSIFICATION_ERROR = {f1e, acce} QUANTIFICATION_ERROR = {mae, mrae, mse, mkld, mnkld} +QUANTIFICATION_ERROR_SINGLE = {ae, rae, se, kld, nkld} QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, mkld, mnkld, mrae} CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR} QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR} +QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE} QUANTIFICATION_ERROR_SMOOTH_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SMOOTH} -ERROR_NAMES = CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES +ERROR_NAMES = CLASSIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_NAMES | QUANTIFICATION_ERROR_SINGLE_NAMES f1_error = f1e acc_error = acce diff --git a/quapy/evaluation.py b/quapy/evaluation.py index 4f5de10..0f94940 100644 --- a/quapy/evaluation.py +++ b/quapy/evaluation.py @@ -7,7 +7,34 @@ from quapy.method.base import BaseQuantifier import pandas as pd -def prediction(model: BaseQuantifier, protocol: AbstractProtocol, aggr_speedup='auto', verbose=False): +def prediction( + model: BaseQuantifier, + protocol: AbstractProtocol, + aggr_speedup: Union[str, bool] = 'auto', + verbose=False): + """ + Uses a quantification model to generate predictions for the samples generated via a specific protocol. + This function is central to all evaluation processes, and is endowed with an optimization to speed-up the + prediction of protocols that generate samples from a large collection. The optimization applies to aggregative + quantifiers only, and to OnLabelledCollection protocols, and comes down to generating the classification + predictions once and for all, and then generating samples over the classification predictions (instead of over + the raw instances), so that the classifier prediction is never called again. This behaviour is obtained by + setting `aggr_speedup` to 'auto' or True, and is only carried out if the overall process is convenient in terms + of computations (e.g., if the number of classification predictions needed for the original collection exceed the + number of classification predictions needed for all samples, then the optimization is not undertaken). + + :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier` + :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of + :class:`quapy.protocol.OnLabelledCollection`, then the aggregation speed-up can be run. This is the protocol + in charge of generating the samples for which the model has to issue class prevalence predictions. + :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of + instances in the original collection on which the protocol acts is larger than the number of instances + in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is + convenient or not. Set to False to deactivate. + :param verbose: boolean, show or not information in stdout + :return: a tuple `(true_prevs, estim_prevs)` in which each element in the tuple is an array of shape + `(n_samples, n_classes)` containing the true, or predicted, prevalence values for each sample + """ assert aggr_speedup in [False, True, 'auto', 'force'], 'invalid value for aggr_speedup' sout = lambda x: print(x) if verbose else None @@ -54,8 +81,29 @@ def __prediction_helper(quantification_fn, protocol: AbstractProtocol, verbose=F def evaluation_report(model: BaseQuantifier, protocol: AbstractProtocol, error_metrics: Iterable[Union[str,Callable]] = 'mae', - aggr_speedup='auto', + aggr_speedup: Union[str, bool] = 'auto', verbose=False): + """ + Generates a report (a pandas' DataFrame) containing information of the evaluation of the model as according + to a specific protocol and in terms of one or more evaluation metrics (errors). + + + :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier` + :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of + :class:`quapy.protocol.OnLabelledCollection`, then the aggregation speed-up can be run. This is the protocol + in charge of generating the samples in which the model is evaluated. + :param error_metrics: a string, or list of strings, representing the name(s) of an error function in `qp.error` + (e.g., 'mae', the default value), or a callable function, or a list of callable functions, implementing + the error function itself. + :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of + instances in the original collection on which the protocol acts is larger than the number of instances + in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is + convenient or not. Set to False to deactivate. + :param verbose: boolean, show or not information in stdout + :return: a pandas' DataFrame containing the columns 'true-prev' (the true prevalence of each sample), + 'estim-prev' (the prevalence estimated by the model for each sample), and as many columns as error metrics + have been indicated, each displaying the score in terms of that metric for every sample. + """ true_prevs, estim_prevs = prediction(model, protocol, aggr_speedup=aggr_speedup, verbose=verbose) return _prevalence_report(true_prevs, estim_prevs, error_metrics) @@ -84,9 +132,28 @@ def _prevalence_report(true_prevs, estim_prevs, error_metrics: Iterable[Union[st def evaluate( model: BaseQuantifier, protocol: AbstractProtocol, - error_metric:Union[str, Callable], - aggr_speedup='auto', + error_metric: Union[str, Callable], + aggr_speedup: Union[str, bool] = 'auto', verbose=False): + """ + Evaluates a quantification model according to a specific sample generation protocol and in terms of one + evaluation metric (error). + + :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier` + :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of + :class:`quapy.protocol.OnLabelledCollection`, then the aggregation speed-up can be run. This is the protocol + in charge of generating the samples in which the model is evaluated. + :param error_metric: a string representing the name(s) of an error function in `qp.error` + (e.g., 'mae'), or a callable function implementing the error function itself. + :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of + instances in the original collection on which the protocol acts is larger than the number of instances + in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is + convenient or not. Set to False to deactivate. + :param verbose: boolean, show or not information in stdout + :return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with + the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns + a single float + """ if isinstance(error_metric, str): error_metric = qp.error.from_name(error_metric) @@ -96,9 +163,21 @@ def evaluate( def evaluate_on_samples( model: BaseQuantifier, - samples: [qp.data.LabelledCollection], - error_metric:Union[str, Callable], + samples: Iterable[qp.data.LabelledCollection], + error_metric: Union[str, Callable], verbose=False): + """ + Evaluates a quantification model on a given set of samples and in terms of one evaluation metric (error). + + :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier` + :param samples: a list of samples on which the quantifier is to be evaluated + :param error_metric: a string representing the name(s) of an error function in `qp.error` + (e.g., 'mae'), or a callable function implementing the error function itself. + :param verbose: boolean, show or not information in stdout + :return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with + the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns + a single float + """ return evaluate(model, IterateProtocol(samples), error_metric, aggr_speedup=False, verbose=verbose) diff --git a/quapy/method/neural.py b/quapy/method/neural.py index e348930..e407aeb 100644 --- a/quapy/method/neural.py +++ b/quapy/method/neural.py @@ -6,7 +6,7 @@ import torch from torch.nn import MSELoss from torch.nn.functional import relu -from protocol import USimplexPP +from protocol import UPP from quapy.method.aggregative import * from quapy.util import EarlyStop @@ -218,7 +218,7 @@ class QuaNetTrainer(BaseQuantifier): self.quanet.train(mode=train) losses = [] mae_errors = [] - sampler = USimplexPP( + sampler = UPP( data, sample_size=self.sample_size, repeats=iterations, diff --git a/quapy/protocol.py b/quapy/protocol.py index 60df09c..a49bfe6 100644 --- a/quapy/protocol.py +++ b/quapy/protocol.py @@ -327,7 +327,7 @@ class NPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): return self.repeats -class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): +class UPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): """ A variant of :class:`APP` that, instead of using a grid of equidistant prevalence values, relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with @@ -348,7 +348,7 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) def __init__(self, data: LabelledCollection, sample_size=None, repeats=100, random_state=0, return_type='sample_prev'): - super(USimplexPP, self).__init__(random_state) + super(UPP, self).__init__(random_state) self.data = data self.sample_size = qp._get_sample_size(sample_size) self.repeats = repeats @@ -357,9 +357,9 @@ class USimplexPP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol) def samples_parameters(self): """ - Return all the necessary parameters to replicate the samples as according to the USimplexPP protocol. + Return all the necessary parameters to replicate the samples as according to the UPP protocol. - :return: a list of indexes that realize the USimplexPP sampling + :return: a list of indexes that realize the UPP sampling """ indexes = [] for prevs in F.uniform_simplex_sampling(n_classes=self.data.n_classes, size=self.repeats): @@ -474,3 +474,8 @@ class DomainMixer(AbstractStochasticSeededProtocol): return self.repeats * len(self.mixture_points) +# aliases + +ArtificialPrevalenceProtocol = APP +NaturalPrevalenceProtocol = NPP +UniformPrevalenceProtocol = UPP \ No newline at end of file diff --git a/quapy/tests/test_evaluation.py b/quapy/tests/test_evaluation.py index db1ddc6..4992d86 100644 --- a/quapy/tests/test_evaluation.py +++ b/quapy/tests/test_evaluation.py @@ -1,8 +1,14 @@ import unittest + +import numpy as np + import quapy as qp from sklearn.linear_model import LogisticRegression from time import time -from quapy.method.aggregative import EMQ + +from error import QUANTIFICATION_ERROR_SINGLE, QUANTIFICATION_ERROR, QUANTIFICATION_ERROR_NAMES, \ + QUANTIFICATION_ERROR_SINGLE_NAMES +from quapy.method.aggregative import EMQ, PCC from quapy.method.base import BaseQuantifier @@ -48,6 +54,31 @@ class EvalTestCase(unittest.TestCase): self.assertEqual(tend_no_optim>(tend_optim/2), True) + def test_evaluation_output(self): + + data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=10, pickle=True) + train, test = data.training, data.test + + qp.environ['SAMPLE_SIZE']=100 + + protocol = qp.protocol.APP(test, random_state=0) + + q = PCC(LogisticRegression()).fit(train) + + single_errors = list(QUANTIFICATION_ERROR_SINGLE_NAMES) + averaged_errors = ['m'+e for e in single_errors] + single_errors = single_errors + [qp.error.from_name(e) for e in single_errors] + averaged_errors = averaged_errors + [qp.error.from_name(e) for e in averaged_errors] + for error_metric, averaged_error_metric in zip(single_errors, averaged_errors): + score = qp.evaluation.evaluate(q, protocol, error_metric=averaged_error_metric) + self.assertTrue(isinstance(score, float)) + + scores = qp.evaluation.evaluate(q, protocol, error_metric=error_metric) + self.assertTrue(isinstance(scores, np.ndarray)) + + self.assertEqual(scores.mean(), score) + + if __name__ == '__main__': unittest.main() diff --git a/quapy/tests/test_protocols.py b/quapy/tests/test_protocols.py index c7e4b15..6c76d4b 100644 --- a/quapy/tests/test_protocols.py +++ b/quapy/tests/test_protocols.py @@ -1,7 +1,7 @@ import unittest import numpy as np from quapy.data import LabelledCollection -from quapy.protocol import APP, NPP, USimplexPP, DomainMixer, AbstractStochasticSeededProtocol +from quapy.protocol import APP, NPP, UPP, DomainMixer, AbstractStochasticSeededProtocol def mock_labelled_collection(prefix=''): @@ -102,14 +102,14 @@ class TestProtocols(unittest.TestCase): def test_kraemer_replicate(self): data = mock_labelled_collection() - p = USimplexPP(data, sample_size=5, repeats=10, random_state=42) + p = UPP(data, sample_size=5, repeats=10, random_state=42) samples1 = samples_to_str(p) samples2 = samples_to_str(p) self.assertEqual(samples1, samples2) - p = USimplexPP(data, sample_size=5, repeats=10) # <- random_state is by default set to 0 + p = UPP(data, sample_size=5, repeats=10) # <- random_state is by default set to 0 samples1 = samples_to_str(p) samples2 = samples_to_str(p) @@ -118,7 +118,7 @@ class TestProtocols(unittest.TestCase): def test_kraemer_not_replicate(self): data = mock_labelled_collection() - p = USimplexPP(data, sample_size=5, repeats=10, random_state=None) + p = UPP(data, sample_size=5, repeats=10, random_state=None) samples1 = samples_to_str(p) samples2 = samples_to_str(p) From 49fc486c53c6d3d06717397674a76c7f06952706 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 14 Feb 2023 17:00:50 +0100 Subject: [PATCH 58/59] preparing to merge --- README.md | 31 +-- docs/build/html/Datasets.html | 2 +- docs/build/html/Evaluation.html | 211 +++++++----------- docs/build/html/Methods.html | 137 +++++++----- docs/build/html/Model-Selection.html | 137 ++++++------ docs/build/html/Plotting.html | 45 ++-- docs/build/html/_sources/Datasets.md.txt | 3 +- docs/build/html/_sources/Evaluation.md.txt | 207 ++++++----------- docs/build/html/_sources/Methods.md.txt | 134 ++++++----- .../html/_sources/Model-Selection.md.txt | 137 ++++++------ docs/build/html/_sources/Plotting.md.txt | 45 ++-- docs/build/html/_sources/index.rst.txt | 2 + docs/build/html/genindex.html | 45 +++- docs/build/html/index.html | 8 + docs/build/html/objects.inv | Bin 2822 -> 2928 bytes docs/build/html/quapy.data.html | 48 ++-- docs/build/html/quapy.html | 194 ++++++++++++++-- docs/build/html/quapy.method.html | 5 +- docs/build/html/searchindex.js | 2 +- examples/model_selection.py | 57 +++++ quapy/CHANGE_LOG.txt | 22 +- quapy/data/base.py | 70 ++++-- quapy/evaluation.py | 10 +- quapy/model_selection.py | 4 +- quapy/plot.py | 14 +- quapy/protocol.py | 9 + quapy/tests/test_labelcollection.py | 47 ++++ 27 files changed, 927 insertions(+), 699 deletions(-) create mode 100644 examples/model_selection.py diff --git a/README.md b/README.md index 10c769f..8f8b7de 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ for facilitating the analysis and interpretation of the experimental results. ### Last updates: +* Version 0.1.7 is released! major changes can be consulted [here](quapy/FCHANGE_LOG.txt). * A detailed documentation is now available [here](https://hlt-isti.github.io/QuaPy/) * The developer API documentation is available [here](https://hlt-isti.github.io/QuaPy/build/html/modules.html) @@ -59,13 +60,14 @@ See the [Wiki](https://github.com/HLT-ISTI/QuaPy/wiki) for detailed examples. ## Features * Implementation of many popular quantification methods (Classify-&-Count and its variants, Expectation Maximization, -quantification methods based on structured output learning, HDy, QuaNet, and quantification ensembles). -* Versatile functionality for performing evaluation based on artificial sampling protocols. +quantification methods based on structured output learning, HDy, QuaNet, quantification ensembles, among others). +* Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.). * Implementation of most commonly used evaluation metrics (e.g., AE, RAE, SE, KLD, NKLD, etc.). * Datasets frequently used in quantification (textual and numeric), including: * 32 UCI Machine Learning datasets. * 11 Twitter quantification-by-sentiment datasets. * 3 product reviews quantification-by-sentiment datasets. + * 4 tasks from LeQua competition (_new in v0.1.7!_) * Native support for binary and single-label multiclass quantification scenarios. * Model selection functionality that minimizes quantification-oriented loss functions. * Visualization tools for analysing the experimental results. @@ -80,29 +82,6 @@ quantification methods based on structured output learning, HDy, QuaNet, and qua * pandas, xlrd * matplotlib -## SVM-perf with quantification-oriented losses -In order to run experiments involving SVM(Q), SVM(KLD), SVM(NKLD), -SVM(AE), or SVM(RAE), you have to first download the -[svmperf](http://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html) -package, apply the patch -[svm-perf-quantification-ext.patch](./svm-perf-quantification-ext.patch), and compile the sources. -The script [prepare_svmperf.sh](prepare_svmperf.sh) does all the job. Simply run: - -``` -./prepare_svmperf.sh -``` - -The resulting directory [svm_perf_quantification](./svm_perf_quantification) contains the -patched version of _svmperf_ with quantification-oriented losses. - -The [svm-perf-quantification-ext.patch](./svm-perf-quantification-ext.patch) is an extension of the patch made available by -[Esuli et al. 2015](https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0) -that allows SVMperf to optimize for -the _Q_ measure as proposed by [Barranquero et al. 2015](https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X) -and for the _KLD_ and _NKLD_ measures as proposed by [Esuli et al. 2015](https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0). -This patch extends the above one by also allowing SVMperf to optimize for -_AE_ and _RAE_. - ## Documentation @@ -113,6 +92,8 @@ are provided: * [Datasets](https://github.com/HLT-ISTI/QuaPy/wiki/Datasets) * [Evaluation](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation) +* [Protocols](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols) * [Methods](https://github.com/HLT-ISTI/QuaPy/wiki/Methods) +* [SVMperf](https://github.com/HLT-ISTI/QuaPy/wiki/ExplicitLossMinimization) * [Model Selection](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection) * [Plotting](https://github.com/HLT-ISTI/QuaPy/wiki/Plotting) diff --git a/docs/build/html/Datasets.html b/docs/build/html/Datasets.html index 9c9eaa7..1636fa0 100644 --- a/docs/build/html/Datasets.html +++ b/docs/build/html/Datasets.html @@ -86,7 +86,7 @@ Take a look at the following code:

    sample = data.sampling(sample_size, *prev) print('instances:', sample.instances) -print('labels:', sample.labels) +print('labels:', sample.classes) print('prevalence:', F.strprev(sample.prevalence(), prec=2)) diff --git a/docs/build/html/Evaluation.html b/docs/build/html/Evaluation.html index 9252ff2..1b41a03 100644 --- a/docs/build/html/Evaluation.html +++ b/docs/build/html/Evaluation.html @@ -20,7 +20,7 @@ - +