Merge branch 'composable-doc' into devel

2024-07-02 12:15:34 +02:00 · 2024-07-02 12:15:34 +02:00 · e83966f1ff
parent 7fb41028d5 2dcc086ec2
commit e83966f1ff
5 changed files with 200 additions and 3 deletions
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -30,6 +30,7 @@ release = quapy.__version__
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

 extensions = [
+    'sphinx.ext.autosectionlabel',
    'sphinx.ext.duration',
    'sphinx.ext.doctest',
    'sphinx.ext.autodoc',
@ -40,6 +41,8 @@ extensions = [
    'myst_parser',
 ]

+autosectionlabel_prefix_document = True
+
 source_suffix = ['.rst', '.md']

 templates_path = ['_templates']
--- a/docs/source/manuals/methods.md
+++ b/docs/source/manuals/methods.md
@ -438,6 +438,65 @@ that can be explored in model selection range in [0.01, 0.25]. The methods' perf
 vary smoothing with smooth variations of this hyperparameter.


+## Composable Methods
+
+The [](quapy.method.composable) module allows the composition of quantification methods from loss functions and feature transformations. Any composed method solves a linear system of equations by minimizing the loss after transforming the data. Methods of this kind include ACC, PACC, HDx, HDy, and many other well-known methods, as well as an unlimited number of re-combinations of their building blocks.
+
+### Installation
+
+```sh
+pip install --upgrade pip setuptools wheel
+pip install "jax[cpu]"
+pip install quapy[composable]
+```
+
+### Basics
+
+The composition of a method is implemented through the [](quapy.method.composable.ComposableQuantifier) class. Its documentation also features an example to get you started in composing your own methods.
+
+```python
+ComposableQuantifier( # ordinal ACC, as proposed by Bunse et al., 2022
+  TikhonovRegularized(LeastSquaresLoss(), 0.01),
+  ClassTransformer(RandomForestClassifier(oob_score=True))
+)
+```
+
+More exhaustive examples of method compositions, including hyper-parameter optimization, can be found in [the example directory](https://github.com/HLT-ISTI/QuaPy/tree/master/examples).
+
+To implement your own loss functions and feature representations, follow the corresponding manual of the [qunfold package](https://github.com/mirkobunse/qunfold), which provides the back-end of QuaPy's composable module.
+
+### Loss functions
+
+- [](quapy.method.composable.LeastSquaresLoss)
+- [](quapy.method.composable.EnergyLoss)
+- [](quapy.method.composable.HellingerSurrogateLoss)
+- [](quapy.method.composable.BlobelLoss)
+- [](quapy.method.composable.CombinedLoss)
+
+```{hint}
+You can use the [](quapy.method.composable.CombinedLoss) to create arbitrary, weighted sums of losses and regularizers.
+```
+
+### Regularization functions
+
+- [](quapy.method.composable.TikhonovRegularized)
+- [](quapy.method.composable.TikhonovRegularization)
+
+### Feature transformations
+
+- [](quapy.method.composable.ClassTransformer)
+- [](quapy.method.composable.DistanceTransformer)
+- [](quapy.method.composable.HistogramTransformer)
+- [](quapy.method.composable.EnergyKernelTransformer)
+- [](quapy.method.composable.GaussianKernelTransformer)
+- [](quapy.method.composable.LaplacianKernelTransformer)
+- [](quapy.method.composable.GaussianRFFKernelTransformer)
+
+```{hint}
+The [](quapy.method.composable.ClassTransformer) requires the classifier to have a property `oob_score==True` and to produce a property `oob_decision_function` during fitting. In [scikit-learn](https://scikit-learn.org/), this requirement is fulfilled by any bagging classifier, such as random forests. Any other classifier needs to be cross-validated through the [](quapy.method.composable.CVClassifier).
+```
+
+
 ## Meta Models

 By _meta_ models we mean quantification methods that are defined on top of other
--- a/examples/14.composable_methods.py
+++ b/examples/14.composable_methods.py
@ -0,0 +1,135 @@
+"""
+This example illustrates the composition of quantification methods from
+arbitrary loss functions and feature transformations. It will extend the basic
+example on the usage of quapy with this composition.
+"""
+
+import numpy as np
+import quapy as qp
+import quapy.functional as F
+
+# First of all, we load the same data as in the basic example.
+
+data = qp.data.preprocessing.text2tfidf(
+    qp.datasets.fetch_reviews("hp"),
+    min_df = 5,
+)
+training, testing = data.train_test
+
+# We start by recovering PACC from its building blocks, a LeastSquaresLoss and
+# a probabilistic ClassTransformer. A 5-fold cross-validation is implemented
+# through a CVClassifier.
+
+from quapy.method.composable import (
+    ComposableQuantifier,
+    LeastSquaresLoss,
+    ClassTransformer,
+    CVClassifier,
+)
+from sklearn.linear_model import LogisticRegression
+
+pacc = ComposableQuantifier(
+    LeastSquaresLoss(),
+    ClassTransformer(
+        CVClassifier(LogisticRegression(random_state=0), 5),
+        is_probabilistic = True
+    ),
+)
+
+# Let's evaluate this quantifier.
+
+print(f"Evaluating PACC: {pacc}")
+pacc.fit(training)
+app = qp.protocol.APP(testing, sample_size=100, n_prevalences=21, repeats=1)
+absolute_errors = qp.evaluation.evaluate(
+    model = pacc,
+    protocol = app,
+    error_metric = "ae",
+)
+print(f"MAE = {np.mean(absolute_errors):.4f}+-{np.std(absolute_errors):.4f}")
+
+# We now turn to the composition of novel methods. As an example, we use the
+# (squared) Hellinger distance as a loss function but, unlike HDy, we do not
+# compute any histograms from the output of the classifier.
+
+from quapy.method.composable import HellingerSurrogateLoss
+
+model = ComposableQuantifier(
+    HellingerSurrogateLoss(), # the loss is different from before
+    ClassTransformer( # we use the same transformer
+        CVClassifier(LogisticRegression(random_state=0), 5),
+        is_probabilistic = True
+    ),
+)
+
+print(f"Evaluating {model}")
+model.fit(training)
+absolute_errors = qp.evaluation.evaluate(
+    model = model,
+    protocol = app, # use the same protocol for evaluation
+    error_metric = "ae",
+)
+print(f"MAE = {np.mean(absolute_errors):.4f}+-{np.std(absolute_errors):.4f}")
+
+# In general, any composed method solves a linear system of equations by
+# minimizing the loss after transforming the data. Methods of this kind include
+# ACC, PACC, HDx, HDy, and many other well-known methods, as well as an
+# unlimited number of re-combinations of their building blocks.
+
+# To illustrate hyper-parameter optimization, we now define a method that
+# employs a weighted sum of the LeastSquaresLoss and the
+# HellingerSurrogateLoss. We will consider both the weighting of these losses
+# and the C parameter of the LogisticRegression as hyper-parameters to be
+# optimized.
+
+from quapy.method.composable import CombinedLoss
+
+model = ComposableQuantifier(
+    CombinedLoss(HellingerSurrogateLoss(), LeastSquaresLoss()),
+    ClassTransformer(
+        CVClassifier(LogisticRegression(random_state=0), 5),
+        is_probabilistic = True
+    ),
+)
+
+from qunfold.quapy import QuaPyWrapper
+from qunfold import GenericMethod
+
+model = QuaPyWrapper(GenericMethod(
+    CombinedLoss(HellingerSurrogateLoss(), LeastSquaresLoss()),
+    ClassTransformer(
+        CVClassifier(LogisticRegression(random_state=0), 5),
+        is_probabilistic = True
+    ),
+))
+
+# The names of the parameters stem from the comparably deep object hierarchy
+# that composable methods define.
+
+param_grid = {
+    "loss__weights": [ (w, 1-w) for w in [.1, .5, .9] ],
+    "transformer__classifier__estimator__C": [1e-1, 1e1],
+}
+
+grid_search = qp.model_selection.GridSearchQ(
+    model = model,
+    param_grid = param_grid,
+    protocol = app, # use the protocol that we used for testing before
+    error = "mae",
+    refit = False,
+    verbose = True,
+).fit(training)
+print(
+    f"Best hyper-parameters = {grid_search.best_params_}",
+    f"Best MAE = {grid_search.best_score_}",
+    sep = "\n",
+)
+
+# Note that a proper evaluation would still require the best model to be
+# evaluated on a separate test set.
+
+# To implement your own loss functions and feature representations, please
+# follow the corresponding manual of the qunfold package. This package provides
+# the back-end of QuaPy’s composable module and is fully compatible with QuaPy.
+#
+# https://mirkobunse.github.io/qunfold/developer-guide.html#custom-implementations
--- a/quapy/method/composable.py
+++ b/quapy/method/composable.py
@ -56,7 +56,7 @@ def ComposableQuantifier(loss, transformer, **kwargs):
    Examples:
        Here, we create the ordinal variant of ACC (Bunse et al., 2023). This variant consists of the original feature transformation of ACC and of the original loss of ACC, the latter of which is regularized towards smooth solutions.

-            >>> from qunfold.method.composable import (
+            >>> from quapy.method.composable import (
            >>>     ComposableQuantifier,
            >>>     TikhonovRegularized,
            >>>     LeastSquaresLoss,
@ -80,7 +80,7 @@ def ComposableQuantifier(loss, transformer, **kwargs):
        
        To use a classifier that does not provide the `oob_score` argument, such as logistic regression, you have to configure a cross validation of this classifier. Here, we employ 10 cross validation folds. 5 folds are the default.

-            >>> from qunfold.method.composable import CVClassifier
+            >>> from quapy.method.composable import CVClassifier
            >>> from sklearn.linear_model import LogisticRegression
            >>> acc_lr = ComposableQuantifier(
            >>>     LeastSquaresLoss(),
--- a/setup.py
+++ b/setup.py
@ -125,7 +125,7 @@ setup(
    # projects.
    extras_require={  # Optional
       'bayes': ['jax', 'jaxlib', 'numpyro'],
-       'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.3'],
+       'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4'],
       'neural': ['torch'],
       'tests': ['certifi'],
       'docs' : ['sphinx-rtd-theme', 'myst-parser'],