231 lines
13 KiB
Python
231 lines
13 KiB
Python
from copy import deepcopy
|
|
from typing import Union, Callable, Iterable
|
|
import numpy as np
|
|
from tqdm import tqdm
|
|
import quapy as qp
|
|
from quapy.protocol import AbstractProtocol, OnLabelledCollectionProtocol, IterateProtocol
|
|
from quapy.method.base import BaseQuantifier
|
|
import pandas as pd
|
|
|
|
|
|
def prediction(
|
|
model: BaseQuantifier,
|
|
protocol: AbstractProtocol,
|
|
aggr_speedup: Union[str, bool] = 'auto',
|
|
verbose=False,
|
|
verbose_error=None,
|
|
n_jobs=1):
|
|
"""
|
|
Uses a quantification model to generate predictions for the samples generated via a specific protocol.
|
|
This function is central to all evaluation processes, and is endowed with an optimization to speed-up the
|
|
prediction of protocols that generate samples from a large collection. The optimization applies to aggregative
|
|
quantifiers only, and to OnLabelledCollectionProtocol protocols, and comes down to generating the classification
|
|
predictions once and for all, and then generating samples over the classification predictions (instead of over
|
|
the raw instances), so that the classifier prediction is never called again. This behaviour is obtained by
|
|
setting `aggr_speedup` to 'auto' or True, and is only carried out if the overall process is convenient in terms
|
|
of computations (e.g., if the number of classification predictions needed for the original collection exceed the
|
|
number of classification predictions needed for all samples, then the optimization is not undertaken).
|
|
|
|
:param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`
|
|
:param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of
|
|
:class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol
|
|
in charge of generating the samples for which the model has to issue class prevalence predictions.
|
|
:param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of
|
|
instances in the original collection on which the protocol acts is larger than the number of instances
|
|
in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is
|
|
convenient or not. Set to False to deactivate.
|
|
:param verbose: boolean, show or not information in stdout
|
|
:param verbose_error: an evaluation function to be used to display intermediate results if verbose=True (default None)
|
|
:param n_jobs: number of parallel workers. Default is 1 so that, if not explicitly requested, the evaluation phase
|
|
is to be carried out in a single core. That is to say, this parameter will not inspect the environment variable
|
|
N_JOBS by default. This might be convenient in many situations, since parallelizing the evaluation entails
|
|
adding an overhead for cloning the objects within different threads that is often not worth the effort.
|
|
:return: a tuple `(true_prevs, estim_prevs)` in which each element in the tuple is an array of shape
|
|
`(n_samples, n_classes)` containing the true, or predicted, prevalence values for each sample
|
|
"""
|
|
assert aggr_speedup in [False, True, 'auto', 'force'], 'invalid value for aggr_speedup'
|
|
|
|
sout = lambda x: print(x) if verbose else None
|
|
|
|
apply_optimization = False
|
|
|
|
if aggr_speedup in [True, 'auto', 'force']:
|
|
# checks whether the prediction can be made more efficiently; this check consists in verifying if the model is
|
|
# of type aggregative, if the protocol is based on LabelledCollection, and if the total number of documents to
|
|
# classify using the protocol would exceed the number of test documents in the original collection
|
|
from quapy.method.aggregative import AggregativeQuantifier
|
|
if isinstance(model, AggregativeQuantifier) and isinstance(protocol, OnLabelledCollectionProtocol):
|
|
if aggr_speedup == 'force':
|
|
apply_optimization = True
|
|
sout(f'forcing aggregative speedup')
|
|
elif hasattr(protocol, 'sample_size'):
|
|
nD = len(protocol.get_labelled_collection())
|
|
samplesD = protocol.total() * protocol.sample_size
|
|
if nD < samplesD:
|
|
apply_optimization = True
|
|
sout(f'speeding up the prediction for the aggregative quantifier, '
|
|
f'total classifications {nD} instead of {samplesD}')
|
|
|
|
if apply_optimization:
|
|
pre_classified = model.classify(protocol.get_labelled_collection().instances)
|
|
protocol_with_predictions = protocol.on_preclassified_instances(pre_classified)
|
|
return __prediction_helper(model, protocol_with_predictions, True, verbose, verbose_error, n_jobs)
|
|
else:
|
|
return __prediction_helper(model, protocol, False, verbose, verbose_error, n_jobs)
|
|
|
|
|
|
def __delayed_prediction(args):
|
|
quantifier, aggregate, sample_instances, sample_prev = args
|
|
quantifier = deepcopy(quantifier)
|
|
quant_fn = quantifier.aggregate if aggregate else quantifier.quantify
|
|
predicted = quant_fn(sample_instances)
|
|
return sample_prev, predicted
|
|
|
|
|
|
def __prediction_helper(quantifier, protocol: AbstractProtocol, aggregate: bool, verbose=False, verbose_error=None, n_jobs=1):
|
|
true_prevs, estim_prevs = [], []
|
|
ongoing_errors = []
|
|
if verbose:
|
|
pbar = tqdm(protocol(), total=protocol.total(), desc='predicting')
|
|
if n_jobs==1:
|
|
quant_fn = quantifier.aggregate if aggregate else quantifier.quantify
|
|
for sample_instances, sample_prev in pbar if verbose else protocol():
|
|
predicted = quant_fn(sample_instances)
|
|
estim_prevs.append(predicted)
|
|
true_prevs.append(sample_prev)
|
|
if verbose and verbose_error is not None:
|
|
err = verbose_error(sample_prev, predicted)
|
|
ongoing_errors.append(err)
|
|
pbar.set_description(f'predicting: ongoing error={np.mean(ongoing_errors):.5f}')
|
|
else:
|
|
if verbose:
|
|
print('parallelizing prediction')
|
|
outputs = qp.util.parallel(
|
|
__delayed_prediction,
|
|
((quantifier, aggregate, sample_X, sample_p) for (sample_X, sample_p) in (pbar if verbose else protocol())),
|
|
seed=qp.environ.get('_R_SEED', None),
|
|
n_jobs=n_jobs
|
|
)
|
|
true_prevs, estim_prevs = list(zip(*outputs))
|
|
|
|
true_prevs = np.asarray(true_prevs)
|
|
estim_prevs = np.asarray(estim_prevs)
|
|
|
|
return true_prevs, estim_prevs
|
|
|
|
|
|
def evaluation_report(model: BaseQuantifier,
|
|
protocol: AbstractProtocol,
|
|
error_metrics: Iterable[Union[str,Callable]] = 'mae',
|
|
aggr_speedup: Union[str, bool] = 'auto',
|
|
verbose=False, verbose_error=None, n_jobs=1):
|
|
"""
|
|
Generates a report (a pandas' DataFrame) containing information of the evaluation of the model as according
|
|
to a specific protocol and in terms of one or more evaluation metrics (errors).
|
|
|
|
|
|
:param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`
|
|
:param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of
|
|
:class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol
|
|
in charge of generating the samples in which the model is evaluated.
|
|
:param error_metrics: a string, or list of strings, representing the name(s) of an error function in `qp.error`
|
|
(e.g., 'mae', the default value), or a callable function, or a list of callable functions, implementing
|
|
the error function itself.
|
|
:param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of
|
|
instances in the original collection on which the protocol acts is larger than the number of instances
|
|
in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is
|
|
convenient or not. Set to False to deactivate.
|
|
:param verbose: boolean, show or not information in stdout
|
|
:param verbose_error: an evaluation function to be used to display intermediate results if verbose=True (default None)
|
|
:param n_jobs: number of parallel workers. Default is 1 so that, if not explicitly requested, the evaluation phase
|
|
is to be carried out in a single core. That is to say, this parameter will not inspect the environment variable
|
|
N_JOBS by default. This might be convenient in many situations, since parallelizing the evaluation entails
|
|
adding an overhead for cloning the objects within different threads that is often not worth the effort.
|
|
:return: a pandas' DataFrame containing the columns 'true-prev' (the true prevalence of each sample),
|
|
'estim-prev' (the prevalence estimated by the model for each sample), and as many columns as error metrics
|
|
have been indicated, each displaying the score in terms of that metric for every sample.
|
|
"""
|
|
|
|
true_prevs, estim_prevs = prediction(
|
|
model, protocol, aggr_speedup=aggr_speedup, verbose=verbose, verbose_error=verbose_error, n_jobs=n_jobs
|
|
)
|
|
return _prevalence_report(true_prevs, estim_prevs, error_metrics)
|
|
|
|
|
|
def _prevalence_report(true_prevs, estim_prevs, error_metrics: Iterable[Union[str, Callable]] = 'mae'):
|
|
|
|
if isinstance(error_metrics, str):
|
|
error_metrics = [error_metrics]
|
|
|
|
error_funcs = [qp.error.from_name(e) if isinstance(e, str) else e for e in error_metrics]
|
|
assert all(hasattr(e, '__call__') for e in error_funcs), 'invalid error functions'
|
|
error_names = [e.__name__ for e in error_funcs]
|
|
|
|
df = pd.DataFrame(columns=['true-prev', 'estim-prev'] + error_names)
|
|
for true_prev, estim_prev in zip(true_prevs, estim_prevs):
|
|
series = {'true-prev': true_prev, 'estim-prev': estim_prev}
|
|
for error_name, error_metric in zip(error_names, error_funcs):
|
|
score = error_metric(true_prev, estim_prev)
|
|
series[error_name] = score
|
|
df = df.append(series, ignore_index=True)
|
|
|
|
return df
|
|
|
|
|
|
def evaluate(
|
|
model: BaseQuantifier,
|
|
protocol: AbstractProtocol,
|
|
error_metric: Union[str, Callable],
|
|
aggr_speedup: Union[str, bool] = 'auto',
|
|
verbose=False):
|
|
"""
|
|
Evaluates a quantification model according to a specific sample generation protocol and in terms of one
|
|
evaluation metric (error).
|
|
|
|
:param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`
|
|
:param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of
|
|
:class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the
|
|
protocol in charge of generating the samples in which the model is evaluated.
|
|
:param error_metric: a string representing the name(s) of an error function in `qp.error`
|
|
(e.g., 'mae'), or a callable function implementing the error function itself.
|
|
:param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of
|
|
instances in the original collection on which the protocol acts is larger than the number of instances
|
|
in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is
|
|
convenient or not. Set to False to deactivate.
|
|
:param verbose: boolean, show or not information in stdout
|
|
:return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with
|
|
the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns
|
|
a single float
|
|
"""
|
|
|
|
if isinstance(error_metric, str):
|
|
error_metric = qp.error.from_name(error_metric)
|
|
true_prevs, estim_prevs = prediction(model, protocol, aggr_speedup=aggr_speedup, verbose=verbose)
|
|
return error_metric(true_prevs, estim_prevs)
|
|
|
|
|
|
def evaluate_on_samples(
|
|
model: BaseQuantifier,
|
|
samples: Iterable[qp.data.LabelledCollection],
|
|
error_metric: Union[str, Callable],
|
|
verbose=False):
|
|
"""
|
|
Evaluates a quantification model on a given set of samples and in terms of one evaluation metric (error).
|
|
|
|
:param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`
|
|
:param samples: a list of samples on which the quantifier is to be evaluated
|
|
:param error_metric: a string representing the name(s) of an error function in `qp.error`
|
|
(e.g., 'mae'), or a callable function implementing the error function itself.
|
|
:param verbose: boolean, show or not information in stdout
|
|
:return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with
|
|
the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns
|
|
a single float
|
|
"""
|
|
|
|
return evaluate(model, IterateProtocol(samples), error_metric, aggr_speedup=False, verbose=verbose)
|
|
|
|
|
|
|
|
|
|
|