2020-12-03 18:12:28 +01:00
|
|
|
from abc import ABCMeta, abstractmethod
|
2022-05-26 17:59:23 +02:00
|
|
|
from copy import deepcopy
|
2023-01-27 18:13:23 +01:00
|
|
|
|
|
|
|
from sklearn.base import BaseEstimator
|
|
|
|
|
2022-06-14 09:35:39 +02:00
|
|
|
import quapy as qp
|
2021-01-15 18:32:32 +01:00
|
|
|
from quapy.data import LabelledCollection
|
2020-12-03 18:12:28 +01:00
|
|
|
|
|
|
|
|
|
|
|
# Base Quantifier abstract class
|
|
|
|
# ------------------------------------
|
2023-01-27 18:13:23 +01:00
|
|
|
class BaseQuantifier(BaseEstimator):
|
2021-12-15 15:27:43 +01:00
|
|
|
"""
|
|
|
|
Abstract Quantifier. A quantifier is defined as an object of a class that implements the method :meth:`fit` on
|
|
|
|
:class:`quapy.data.base.LabelledCollection`, the method :meth:`quantify`, and the :meth:`set_params` and
|
|
|
|
:meth:`get_params` for model selection (see :meth:`quapy.model_selection.GridSearchQ`)
|
|
|
|
"""
|
2020-12-03 18:12:28 +01:00
|
|
|
|
|
|
|
@abstractmethod
|
2021-12-15 15:27:43 +01:00
|
|
|
def fit(self, data: LabelledCollection):
|
|
|
|
"""
|
|
|
|
Trains a quantifier.
|
|
|
|
|
|
|
|
:param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
|
|
|
|
:return: self
|
|
|
|
"""
|
|
|
|
...
|
2020-12-03 18:12:28 +01:00
|
|
|
|
|
|
|
@abstractmethod
|
2021-12-15 15:27:43 +01:00
|
|
|
def quantify(self, instances):
|
|
|
|
"""
|
|
|
|
Generate class prevalence estimates for the sample's instances
|
|
|
|
|
|
|
|
:param instances: array-like
|
|
|
|
:return: `np.ndarray` of shape `(self.n_classes_,)` with class prevalence estimates.
|
|
|
|
"""
|
|
|
|
...
|
2020-12-03 18:12:28 +01:00
|
|
|
|
2023-01-27 18:13:23 +01:00
|
|
|
# @abstractmethod
|
|
|
|
# def set_params(self, **parameters):
|
|
|
|
# """
|
|
|
|
# Set the parameters of the quantifier.
|
|
|
|
#
|
|
|
|
# :param parameters: dictionary of param-value pairs
|
|
|
|
# """
|
|
|
|
# ...
|
|
|
|
#
|
|
|
|
# @abstractmethod
|
|
|
|
# def get_params(self, deep=True):
|
|
|
|
# """
|
|
|
|
# Return the current parameters of the quantifier.
|
|
|
|
#
|
|
|
|
# :param deep: for compatibility with sklearn
|
|
|
|
# :return: a dictionary of param-value pairs
|
|
|
|
# """
|
|
|
|
# ...
|
2020-12-03 18:12:28 +01:00
|
|
|
|
|
|
|
|
2021-01-06 14:58:29 +01:00
|
|
|
class BinaryQuantifier(BaseQuantifier):
|
2021-12-15 15:27:43 +01:00
|
|
|
"""
|
|
|
|
Abstract class of binary quantifiers, i.e., quantifiers estimating class prevalence values for only two classes
|
|
|
|
(typically, to be interpreted as one class and its complement).
|
|
|
|
"""
|
2021-06-16 13:53:54 +02:00
|
|
|
|
2021-01-06 14:58:29 +01:00
|
|
|
def _check_binary(self, data: LabelledCollection, quantifier_name):
|
|
|
|
assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \
|
|
|
|
f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.'
|
|
|
|
|
2022-06-14 09:35:39 +02:00
|
|
|
|
2022-05-26 17:59:23 +02:00
|
|
|
class OneVsAllGeneric:
|
|
|
|
"""
|
|
|
|
Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary
|
2023-01-27 18:13:23 +01:00
|
|
|
quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.
|
2022-05-26 17:59:23 +02:00
|
|
|
"""
|
2021-01-07 17:58:48 +01:00
|
|
|
|
2022-06-14 09:35:39 +02:00
|
|
|
def __init__(self, binary_quantifier, n_jobs=None):
|
2022-05-26 17:59:23 +02:00
|
|
|
assert isinstance(binary_quantifier, BaseQuantifier), \
|
|
|
|
f'{binary_quantifier} does not seem to be a Quantifier'
|
|
|
|
self.binary_quantifier = binary_quantifier
|
2022-06-14 09:35:39 +02:00
|
|
|
self.n_jobs = qp.get_njobs(n_jobs)
|
2022-05-26 17:59:23 +02:00
|
|
|
|
|
|
|
def fit(self, data: LabelledCollection, **kwargs):
|
|
|
|
assert not data.binary, \
|
|
|
|
f'{self.__class__.__name__} expect non-binary data'
|
|
|
|
self.class_quatifier = {c: deepcopy(self.binary_quantifier) for c in data.classes_}
|
|
|
|
Parallel(n_jobs=self.n_jobs, backend='threading')(
|
|
|
|
delayed(self._delayed_binary_fit)(c, self.class_quatifier, data, **kwargs) for c in data.classes_
|
|
|
|
)
|
|
|
|
return self
|
|
|
|
|
|
|
|
def quantify(self, X, *args):
|
|
|
|
prevalences = np.asarray(
|
|
|
|
Parallel(n_jobs=self.n_jobs, backend='threading')(
|
|
|
|
delayed(self._delayed_binary_predict)(c, self.class_quatifier, X) for c in self.classes
|
|
|
|
)
|
|
|
|
)
|
|
|
|
return F.normalize_prevalence(prevalences)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def classes(self):
|
|
|
|
return sorted(self.class_quatifier.keys())
|
2021-01-06 14:58:29 +01:00
|
|
|
|
2022-05-26 17:59:23 +02:00
|
|
|
def set_params(self, **parameters):
|
|
|
|
self.binary_quantifier.set_params(**parameters)
|
|
|
|
|
|
|
|
def get_params(self, deep=True):
|
|
|
|
return self.binary_quantifier.get_params()
|
2021-01-18 16:52:19 +01:00
|
|
|
|
2023-01-27 18:13:23 +01:00
|
|
|
def _delayed_binary_predict(self, c, quantifiers, X):
|
|
|
|
return quantifiers[c].quantify(X)[:, 1] # the mean is the estimation for the positive class prevalence
|
2021-01-18 16:52:19 +01:00
|
|
|
|
2023-01-27 18:13:23 +01:00
|
|
|
def _delayed_binary_fit(self, c, quantifiers, data, **kwargs):
|
2022-05-26 17:59:23 +02:00
|
|
|
bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2)
|
2023-01-27 18:13:23 +01:00
|
|
|
quantifiers[c].fit(bindata, **kwargs)
|
2020-12-11 19:28:17 +01:00
|
|
|
|
|
|
|
|