Added NAE, NRAE

This commit is contained in:
Andrea Esuli 2023-11-03 15:45:46 +01:00
parent e71f82105e
commit 69e78edbee
5 changed files with 80 additions and 7 deletions

View File

@ -76,7 +76,7 @@ See the [Wiki](https://github.com/HLT-ISTI/QuaPy/wiki) for detailed examples.
* Implementation of many popular quantification methods (Classify-&-Count and its variants, Expectation Maximization, * Implementation of many popular quantification methods (Classify-&-Count and its variants, Expectation Maximization,
quantification methods based on structured output learning, HDy, QuaNet, quantification ensembles, among others). quantification methods based on structured output learning, HDy, QuaNet, quantification ensembles, among others).
* Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.). * Versatile functionality for performing evaluation based on sampling generation protocols (e.g., APP, NPP, etc.).
* Implementation of most commonly used evaluation metrics (e.g., AE, RAE, SE, KLD, NKLD, etc.). * Implementation of most commonly used evaluation metrics (e.g., AE, RAE, NAE, NRAE, SE, KLD, NKLD, etc.).
* Datasets frequently used in quantification (textual and numeric), including: * Datasets frequently used in quantification (textual and numeric), including:
* 32 UCI Machine Learning datasets. * 32 UCI Machine Learning datasets.
* 11 Twitter quantification-by-sentiment datasets. * 11 Twitter quantification-by-sentiment datasets.

View File

@ -33,7 +33,6 @@ Refactor protocols. APP and NPP related functionalities are duplicated in functi
New features: New features:
========================================== ==========================================
Add NAE, NRAE
Add "measures for evaluating ordinal"? Add "measures for evaluating ordinal"?
Add datasets for topic. Add datasets for topic.
Do we want to cover cross-lingual quantification natively in QuaPy, or does it make more sense as an application on top? Do we want to cover cross-lingual quantification natively in QuaPy, or does it make more sense as an application on top?

View File

@ -1,4 +1,4 @@
Change Log 0.1.7 Change Log 0.1.8
---------------- ----------------
- New UCI multiclass datasets added (thanks to Pablo González). The 5 UCI multiclass datasets are those corresponding - New UCI multiclass datasets added (thanks to Pablo González). The 5 UCI multiclass datasets are those corresponding
@ -7,6 +7,7 @@ Change Log 0.1.7
- >2 classes - >2 classes
- classification datasets - classification datasets
- Python API available - Python API available
- Added NAE, NRAE
Change Log 0.1.7 Change Log 0.1.7
---------------- ----------------

View File

@ -70,6 +70,32 @@ def ae(prevs, prevs_hat):
return abs(prevs_hat - prevs).mean(axis=-1) return abs(prevs_hat - prevs).mean(axis=-1)
def nae(prevs, prevs_hat):
"""Computes the normalized absolute error between the two prevalence vectors.
Normalized absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as
:math:`NAE(p,\\hat{p})=\\frac{AE(p,\\hat{p})}{z_{AE}}`,
where :math:`z_{AE}=\\frac{2(1-\\min_{y\\in \\mathcal{Y}} p(y))}{|\\mathcal{Y}|}`, and :math:`\\mathcal{Y}`
are the classes of interest.
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:return: normalized absolute error
"""
assert prevs.shape == prevs_hat.shape, f'wrong shape {prevs.shape} vs. {prevs_hat.shape}'
return abs(prevs_hat - prevs).sum(axis=-1)/(2*(1-prevs.min(axis=-1)))
def mnae(prevs, prevs_hat):
"""Computes the mean normalized absolute error (see :meth:`quapy.error.nae`) across the sample pairs.
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
:return: mean normalized absolute error
"""
return nae(prevs, prevs_hat).mean()
def mse(prevs, prevs_hat): def mse(prevs, prevs_hat):
"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs. """Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.
@ -216,6 +242,49 @@ def rae(prevs, prevs_hat, eps=None):
return (abs(prevs - prevs_hat) / prevs).mean(axis=-1) return (abs(prevs - prevs_hat) / prevs).mean(axis=-1)
def nrae(prevs, prevs_hat, eps=None):
"""Computes the normalized absolute relative error between the two prevalence vectors.
Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`
is computed as
:math:`NRAE(p,\\hat{p})= \\frac{RAE(p,\\hat{p})}{z_{RAE}}`,
where
:math:`z_{RAE} = \\frac{|\\mathcal{Y}|-1+\\frac{1-\\min_{y\\in \\mathcal{Y}} p(y)}{\\min_{y\\in \\mathcal{Y}} p(y)}}{|\\mathcal{Y}|}`
and :math:`\\mathcal{Y}` are the classes of interest.
The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
:param eps: smoothing factor. `nrae` is not defined in cases in which the true distribution
contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the
sample size. If `eps=None`, the sample size will be taken from the environment variable
`SAMPLE_SIZE` (which has thus to be set beforehand).
:return: normalized relative absolute error
"""
eps = __check_eps(eps)
prevs = smooth(prevs, eps)
prevs_hat = smooth(prevs_hat, eps)
min_p = prevs.min(axis=-1)
return (abs(prevs - prevs_hat) / prevs).sum(axis=-1)/(prevs.shape[-1]-1+(1-min_p)/min_p)
def mnrae(prevs, prevs_hat, eps=None):
"""Computes the mean normalized relative absolute error (see :meth:`quapy.error.nrae`) across
the sample pairs. The distributions are smoothed using the `eps` factor (see
:meth:`quapy.error.smooth`).
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true
prevalence values
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
prevalence values
:param eps: smoothing factor. `mnrae` is not defined in cases in which the true
distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,
with :math:`T` the sample size. If `eps=None`, the sample size will be taken from
the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).
:return: mean normalized relative absolute error
"""
return nrae(prevs, prevs_hat, eps).mean()
def smooth(prevs, eps): def smooth(prevs, eps):
""" Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as: """ Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as:
:math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+ :math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+
@ -239,9 +308,9 @@ def __check_eps(eps=None):
CLASSIFICATION_ERROR = {f1e, acce} CLASSIFICATION_ERROR = {f1e, acce}
QUANTIFICATION_ERROR = {mae, mrae, mse, mkld, mnkld} QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
QUANTIFICATION_ERROR_SINGLE = {ae, rae, se, kld, nkld} QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, mkld, mnkld, mrae} QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR} CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR} QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE} QUANTIFICATION_ERROR_SINGLE_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR_SINGLE}
@ -255,3 +324,7 @@ mean_absolute_error = mae
absolute_error = ae absolute_error = ae
mean_relative_absolute_error = mrae mean_relative_absolute_error = mrae
relative_absolute_error = rae relative_absolute_error = rae
normalized_absolute_error = nae
normalized_relative_absolute_error = nrae
mean_normalized_absolute_error = mnae
mean_normalized_relative_absolute_error = mnrae

View File

@ -6,7 +6,7 @@ import quapy as qp
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
from time import time from time import time
from error import QUANTIFICATION_ERROR_SINGLE, QUANTIFICATION_ERROR, QUANTIFICATION_ERROR_NAMES, \ from quapy.error import QUANTIFICATION_ERROR_SINGLE, QUANTIFICATION_ERROR, QUANTIFICATION_ERROR_NAMES, \
QUANTIFICATION_ERROR_SINGLE_NAMES QUANTIFICATION_ERROR_SINGLE_NAMES
from quapy.method.aggregative import EMQ, PCC from quapy.method.aggregative import EMQ, PCC
from quapy.method.base import BaseQuantifier from quapy.method.base import BaseQuantifier