Composable methods integrated from qunfold, which is an extra dependency for quapy.method.composable
This commit is contained in:
parent
a64620c377
commit
2000c33372
|
@ -52,6 +52,14 @@ quapy.method.non\_aggregative module
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
quapy.method.composable module
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
.. automodule:: quapy.method.composable
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
"""This module allows the composition of quantification methods from loss functions and feature transformations. This functionality is realized through an integration of the qunfold package: https://github.com/mirkobunse/qunfold."""
|
||||||
|
|
||||||
|
import qunfold
|
||||||
|
from qunfold.quapy import QuaPyWrapper
|
||||||
|
from qunfold.sklearn import CVClassifier
|
||||||
|
from qunfold import (
|
||||||
|
LeastSquaresLoss, # losses
|
||||||
|
BlobelLoss,
|
||||||
|
EnergyLoss,
|
||||||
|
HellingerSurrogateLoss,
|
||||||
|
CombinedLoss,
|
||||||
|
TikhonovRegularization,
|
||||||
|
TikhonovRegularized,
|
||||||
|
ClassTransformer, # transformers
|
||||||
|
HistogramTransformer,
|
||||||
|
DistanceTransformer,
|
||||||
|
KernelTransformer,
|
||||||
|
EnergyKernelTransformer,
|
||||||
|
LaplacianKernelTransformer,
|
||||||
|
GaussianKernelTransformer,
|
||||||
|
GaussianRFFKernelTransformer,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all__ = [ # control public members, e.g., for auto-documentation in sphinx; omit QuaPyWrapper
|
||||||
|
"ComposableQuantifier",
|
||||||
|
"CVClassifier",
|
||||||
|
"LeastSquaresLoss",
|
||||||
|
"BlobelLoss",
|
||||||
|
"EnergyLoss",
|
||||||
|
"HellingerSurrogateLoss",
|
||||||
|
"CombinedLoss",
|
||||||
|
"TikhonovRegularization",
|
||||||
|
"TikhonovRegularized",
|
||||||
|
"ClassTransformer",
|
||||||
|
"HistogramTransformer",
|
||||||
|
"DistanceTransformer",
|
||||||
|
"KernelTransformer",
|
||||||
|
"EnergyKernelTransformer",
|
||||||
|
"LaplacianKernelTransformer",
|
||||||
|
"GaussianKernelTransformer",
|
||||||
|
"GaussianRFFKernelTransformer",
|
||||||
|
]
|
||||||
|
|
||||||
|
def ComposableQuantifier(loss, transformer, **kwargs):
|
||||||
|
"""A generic quantification / unfolding method that solves a linear system of equations.
|
||||||
|
|
||||||
|
This class represents any quantifier that can be described in terms of a loss function, a feature transformation, and a regularization term. In this implementation, the loss is minimized through unconstrained second-order minimization. Valid probability estimates are ensured through a soft-max trick by Bunse (2022).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
loss: An instance of a loss class from `quapy.methods.composable`.
|
||||||
|
transformer: An instance of a transformer class from `quapy.methods.composable`.
|
||||||
|
solver (optional): The `method` argument in `scipy.optimize.minimize`. Defaults to `"trust-ncg"`.
|
||||||
|
solver_options (optional): The `options` argument in `scipy.optimize.minimize`. Defaults to `{"gtol": 1e-8, "maxiter": 1000}`.
|
||||||
|
seed (optional): A random number generator seed from which a numpy RandomState is created. Defaults to `None`.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
Here, we create the ordinal variant of ACC (Bunse et al., 2023). This variant consists of the original feature transformation of ACC and of the original loss of ACC, the latter of which is regularized towards smooth solutions.
|
||||||
|
|
||||||
|
>>> from qunfold.method.composable import (
|
||||||
|
>>> ComposableQuantifier,
|
||||||
|
>>> TikhonovRegularized,
|
||||||
|
>>> LeastSquaresLoss,
|
||||||
|
>>> ClassTransformer,
|
||||||
|
>>> )
|
||||||
|
>>> from sklearn.ensemble import RandomForestClassifier
|
||||||
|
>>> o_acc = ComposableQuantifier(
|
||||||
|
>>> TikhonovRegularized(LeastSquaresLoss(), 0.01),
|
||||||
|
>>> ClassTransformer(RandomForestClassifier(oob_score=True))
|
||||||
|
>>> )
|
||||||
|
|
||||||
|
Here, we perform hyper-parameter optimization with the ordinal ACC.
|
||||||
|
|
||||||
|
>>> quapy.model_selection.GridSearchQ(
|
||||||
|
>>> model = o_acc,
|
||||||
|
>>> param_grid = { # try both splitting criteria
|
||||||
|
>>> "transformer__classifier__estimator__criterion": ["gini", "entropy"],
|
||||||
|
>>> },
|
||||||
|
>>> # ...
|
||||||
|
>>> )
|
||||||
|
|
||||||
|
To use a classifier that does not provide the `oob_score` argument, such as logistic regression, you have to configure a cross validation of this classifier. Here, we employ 10 cross validation folds. 5 folds are the default.
|
||||||
|
|
||||||
|
>>> from qunfold.method.composable import CVClassifier
|
||||||
|
>>> from sklearn.linear_model import LogisticRegression
|
||||||
|
>>> acc_lr = ComposableQuantifier(
|
||||||
|
>>> LeastSquaresLoss(),
|
||||||
|
>>> ClassTransformer(CVClassifier(LogisticRegression(), 10))
|
||||||
|
>>> )
|
||||||
|
"""
|
||||||
|
return QuaPyWrapper(qunfold.GenericMethod(loss, transformer, **kwargs))
|
|
@ -9,6 +9,29 @@ from quapy.method.meta import Ensemble
|
||||||
from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_METHODS
|
from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_METHODS
|
||||||
from quapy.functional import check_prevalence_vector
|
from quapy.functional import check_prevalence_vector
|
||||||
|
|
||||||
|
# a random selection of composed methods to test the qunfold integration
|
||||||
|
from quapy.method.composable import (
|
||||||
|
ComposableQuantifier,
|
||||||
|
LeastSquaresLoss,
|
||||||
|
HellingerSurrogateLoss,
|
||||||
|
ClassTransformer,
|
||||||
|
HistogramTransformer,
|
||||||
|
CVClassifier,
|
||||||
|
)
|
||||||
|
COMPOSABLE_METHODS = [
|
||||||
|
ComposableQuantifier( # ACC
|
||||||
|
LeastSquaresLoss(),
|
||||||
|
ClassTransformer(CVClassifier(LogisticRegression()))
|
||||||
|
),
|
||||||
|
ComposableQuantifier( # HDy
|
||||||
|
HellingerSurrogateLoss(),
|
||||||
|
HistogramTransformer(
|
||||||
|
3, # 3 bins per class
|
||||||
|
preprocessor = ClassTransformer(CVClassifier(LogisticRegression()))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
class TestMethods(unittest.TestCase):
|
class TestMethods(unittest.TestCase):
|
||||||
|
|
||||||
tiny_dataset_multiclass = qp.datasets.fetch_UCIMulticlassDataset('academic-success').reduce(n_test=10)
|
tiny_dataset_multiclass = qp.datasets.fetch_UCIMulticlassDataset('academic-success').reduce(n_test=10)
|
||||||
|
@ -87,6 +110,14 @@ class TestMethods(unittest.TestCase):
|
||||||
estim_prevalences = model.quantify(dataset.test.instances)
|
estim_prevalences = model.quantify(dataset.test.instances)
|
||||||
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
||||||
|
|
||||||
|
def test_composable(self):
|
||||||
|
for dataset in TestMethods.datasets:
|
||||||
|
for q in COMPOSABLE_METHODS:
|
||||||
|
print('testing', q)
|
||||||
|
q.fit(dataset.training)
|
||||||
|
estim_prevalences = q.quantify(dataset.test.X)
|
||||||
|
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -125,7 +125,9 @@ setup(
|
||||||
# projects.
|
# projects.
|
||||||
extras_require={ # Optional
|
extras_require={ # Optional
|
||||||
'bayes': ['jax', 'jaxlib', 'numpyro'],
|
'bayes': ['jax', 'jaxlib', 'numpyro'],
|
||||||
|
'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.3'],
|
||||||
'tests': ['certifi'],
|
'tests': ['certifi'],
|
||||||
|
'docs' : ['sphinx-rtd-theme'],
|
||||||
},
|
},
|
||||||
|
|
||||||
# If there are data files included in your packages that need to be
|
# If there are data files included in your packages that need to be
|
||||||
|
|
Loading…
Reference in New Issue