QuaPy/quapy/evaluation.py

from typing import Union, Callable, Iterable

import numpy as np
from joblib import Parallel, delayed
from tqdm import tqdm

import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier
from quapy.util import temp_seed
import quapy.functional as F
import pandas as pd

def artificial_sampling_prediction(
        model: BaseQuantifier,
        test: LabelledCollection,
        sample_size,
        n_prevpoints=210,
        n_repetitions=1,
        n_jobs=1,
        random_seed=42,
        verbose=True
):
    """
    Performs the predictions for all samples generated according to the artificial sampling protocol.
    :param model: the model in charge of generating the class prevalence estimations
    :param test: the test set on which to perform arificial sampling
    :param sample_size: the size of the samples
    :param n_prevpoints: the number of different prevalences to sample
    :param n_repetitions: the number of repetitions for each prevalence
    :param n_jobs: number of jobs to be run in parallel
    :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
    any other random process.
    :param verbose: if True, shows a progress bar
    :return: two ndarrays of shape (m,n) with m the number of samples (n_prevpoints*n_repetitions) and n the
     number of classes. The first one contains the true prevalences for the samples generated while the second one
     contains the the prevalence estimations
    """

    with temp_seed(random_seed):
        indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))

    if model.aggregative: #isinstance(model, qp.method.aggregative.AggregativeQuantifier):
        # print('\tinstance of aggregative-quantifier')
        quantification_func = model.aggregate
        if model.probabilistic: # isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier):
            # print('\t\tinstance of probabilitstic-aggregative-quantifier')
            preclassified_instances = model.posterior_probabilities(test.instances)
        else:
            # print('\t\tinstance of hard-aggregative-quantifier')
            preclassified_instances = model.classify(test.instances)
        test = LabelledCollection(preclassified_instances, test.labels)
    else:
        # print('\t\tinstance of base-quantifier')
        quantification_func = model.quantify

    def _predict_prevalences(index):
        sample = test.sampling_from_index(index)
        true_prevalence = sample.prevalence()
        estim_prevalence = quantification_func(sample.instances)
        return true_prevalence, estim_prevalence

    pbar = tqdm(indexes, desc='[artificial sampling protocol] predicting') if verbose else indexes
    results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)

    true_prevalences, estim_prevalences = zip(*results)
    true_prevalences = np.asarray(true_prevalences)
    estim_prevalences = np.asarray(estim_prevalences)

    return true_prevalences, estim_prevalences


def artificial_sampling_report(
        model: BaseQuantifier,
        test: LabelledCollection,
        sample_size,
        n_prevpoints=210,
        n_repetitions=1,
        n_jobs=1,
        random_seed=42,
        error_metrics:Iterable[Union[str,Callable]]='mae',
        verbose=True):

    if isinstance(error_metrics, str):
        error_metrics=[error_metrics]

    error_names = [e if isinstance(e, str) else e.__name__ for e in error_metrics]
    error_funcs = [qp.error.from_name(e) if isinstance(e, str) else e for e in error_metrics]
    assert all(hasattr(e, '__call__') for e in error_funcs), 'invalid error functions'

    df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names)
    true_prevs, estim_prevs = artificial_sampling_prediction(
        model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose
    )
    for true_prev, estim_prev in zip(true_prevs, estim_prevs):
        series = {'true-prev': true_prev, 'estim-prev': estim_prev}
        for error_name, error_metric in zip(error_names, error_funcs):
            score = error_metric(true_prev, estim_prev)
            series[error_name] = score
        df = df.append(series, ignore_index=True)

    return df


def artificial_sampling_eval(
        model: BaseQuantifier,
        test: LabelledCollection,
        sample_size,
        n_prevpoints=210,
        n_repetitions=1,
        n_jobs=1,
        random_seed=42,
        error_metric:Union[str,Callable]='mae',
        verbose=True):

    if isinstance(error_metric, str):
        error_metric = qp.error.from_name(error_metric)

    assert hasattr(error_metric, '__call__'), 'invalid error function'

    true_prevs, estim_prevs = artificial_sampling_prediction(
        model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose
    )

    return error_metric(true_prevs, estim_prevs)


def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], err:Union[str, Callable], n_jobs:int=-1):
    if isinstance(err, str):
        err = qp.error.from_name(err)
    scores = qp.util.parallel(_delayed_eval, ((model, Ti, err) for Ti in test_samples), n_jobs=n_jobs)
    return np.mean(scores)


def _delayed_eval(args):
    model, test, error = args
    prev_estim = model.quantify(test.instances)
    prev_true  = test.prevalence()
    return error(prev_true, prev_estim)
added Ensemble methods (methods ALL, ACC, Ptr, DS from Pérez-Gallego et al 2017 and 2019) and some UCI ML datasets used in those articles (only 5 datasets out of 32 they used) 2021-01-06 14:58:29 +01:00			`from typing import Union, Callable, Iterable`
import fixes 2021-01-15 18:32:32 +01:00
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00			`import numpy as np`
			`from joblib import Parallel, delayed`
			`from tqdm import tqdm`
import fixes 2021-01-15 18:32:32 +01:00
			`import quapy as qp`
			`from quapy.data import LabelledCollection`
			`from quapy.method.base import BaseQuantifier`
			`from quapy.util import temp_seed`
some refactor made in order to accomodate OneVsAll to operate with aggregative probabilistic quantifiers; launching OneVsAll(HDy) 2021-01-18 16:52:19 +01:00			`import quapy.functional as F`
all uci datasets from Pérez-Gállego added, quantification report added 2021-01-28 18:22:43 +01:00			`import pandas as pd`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00
			`def artificial_sampling_prediction(`
			`model: BaseQuantifier,`
			`test: LabelledCollection,`
			`sample_size,`
refactoring aggregative methods as methods that not only implement 'classify' and 'quantify', but that also implement 'aggregate' and that, by default, have a default implementation of 'quantify' as a pipeline of 'classify' and 'aggregate'; this helps speeding up evaluations A LOT, since the documents can be pre-classified and the samples are carried out across pre-classified values (labels, or posterior probabilities), and thus only aggregate is called many times within the artificial sampling protocol 2020-12-11 19:28:17 +01:00			`n_prevpoints=210,`
			`n_repetitions=1,`
quapy fixed 2021-01-22 09:58:12 +01:00			`n_jobs=1,`
added model selection for quantification 2020-12-22 17:43:23 +01:00			`random_seed=42,`
			`verbose=True`
			`):`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00			`"""`
			`Performs the predictions for all samples generated according to the artificial sampling protocol.`
			`:param model: the model in charge of generating the class prevalence estimations`
			`:param test: the test set on which to perform arificial sampling`
			`:param sample_size: the size of the samples`
refactoring aggregative methods as methods that not only implement 'classify' and 'quantify', but that also implement 'aggregate' and that, by default, have a default implementation of 'quantify' as a pipeline of 'classify' and 'aggregate'; this helps speeding up evaluations A LOT, since the documents can be pre-classified and the samples are carried out across pre-classified values (labels, or posterior probabilities), and thus only aggregate is called many times within the artificial sampling protocol 2020-12-11 19:28:17 +01:00			`:param n_prevpoints: the number of different prevalences to sample`
			`:param n_repetitions: the number of repetitions for each prevalence`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00			`:param n_jobs: number of jobs to be run in parallel`
			`:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect`
			`any other random process.`
added model selection for quantification 2020-12-22 17:43:23 +01:00			`:param verbose: if True, shows a progress bar`
plot functionality added 2021-01-07 17:58:48 +01:00			`:return: two ndarrays of shape (m,n) with m the number of samples (n_prevpoints*n_repetitions) and n the`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00			`number of classes. The first one contains the true prevalences for the samples generated while the second one`
plot functionality added 2021-01-07 17:58:48 +01:00			`contains the the prevalence estimations`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00			`"""`

			`with temp_seed(random_seed):`
refactoring aggregative methods as methods that not only implement 'classify' and 'quantify', but that also implement 'aggregate' and that, by default, have a default implementation of 'quantify' as a pipeline of 'classify' and 'aggregate'; this helps speeding up evaluations A LOT, since the documents can be pre-classified and the samples are carried out across pre-classified values (labels, or posterior probabilities), and thus only aggregate is called many times within the artificial sampling protocol 2020-12-11 19:28:17 +01:00			`indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))`

some refactor made in order to accomodate OneVsAll to operate with aggregative probabilistic quantifiers; launching OneVsAll(HDy) 2021-01-18 16:52:19 +01:00			`if model.aggregative: #isinstance(model, qp.method.aggregative.AggregativeQuantifier):`
refactor of ensembles, launching EPACC with Ptr policy 2021-01-19 18:26:40 +01:00			`# print('\tinstance of aggregative-quantifier')`
refactoring aggregative methods as methods that not only implement 'classify' and 'quantify', but that also implement 'aggregate' and that, by default, have a default implementation of 'quantify' as a pipeline of 'classify' and 'aggregate'; this helps speeding up evaluations A LOT, since the documents can be pre-classified and the samples are carried out across pre-classified values (labels, or posterior probabilities), and thus only aggregate is called many times within the artificial sampling protocol 2020-12-11 19:28:17 +01:00			`quantification_func = model.aggregate`
some refactor made in order to accomodate OneVsAll to operate with aggregative probabilistic quantifiers; launching OneVsAll(HDy) 2021-01-18 16:52:19 +01:00			`if model.probabilistic: # isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier):`
refactor of ensembles, launching EPACC with Ptr policy 2021-01-19 18:26:40 +01:00			`# print('\t\tinstance of probabilitstic-aggregative-quantifier')`
refactoring aggregative methods as methods that not only implement 'classify' and 'quantify', but that also implement 'aggregate' and that, by default, have a default implementation of 'quantify' as a pipeline of 'classify' and 'aggregate'; this helps speeding up evaluations A LOT, since the documents can be pre-classified and the samples are carried out across pre-classified values (labels, or posterior probabilities), and thus only aggregate is called many times within the artificial sampling protocol 2020-12-11 19:28:17 +01:00			`preclassified_instances = model.posterior_probabilities(test.instances)`
			`else:`
refactor of ensembles, launching EPACC with Ptr policy 2021-01-19 18:26:40 +01:00			`# print('\t\tinstance of hard-aggregative-quantifier')`
refactoring aggregative methods as methods that not only implement 'classify' and 'quantify', but that also implement 'aggregate' and that, by default, have a default implementation of 'quantify' as a pipeline of 'classify' and 'aggregate'; this helps speeding up evaluations A LOT, since the documents can be pre-classified and the samples are carried out across pre-classified values (labels, or posterior probabilities), and thus only aggregate is called many times within the artificial sampling protocol 2020-12-11 19:28:17 +01:00			`preclassified_instances = model.classify(test.instances)`
			`test = LabelledCollection(preclassified_instances, test.labels)`
			`else:`
refactor of ensembles, launching EPACC with Ptr policy 2021-01-19 18:26:40 +01:00			`# print('\t\tinstance of base-quantifier')`
refactoring aggregative methods as methods that not only implement 'classify' and 'quantify', but that also implement 'aggregate' and that, by default, have a default implementation of 'quantify' as a pipeline of 'classify' and 'aggregate'; this helps speeding up evaluations A LOT, since the documents can be pre-classified and the samples are carried out across pre-classified values (labels, or posterior probabilities), and thus only aggregate is called many times within the artificial sampling protocol 2020-12-11 19:28:17 +01:00			`quantification_func = model.quantify`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00
			`def _predict_prevalences(index):`
			`sample = test.sampling_from_index(index)`
			`true_prevalence = sample.prevalence()`
refactoring aggregative methods as methods that not only implement 'classify' and 'quantify', but that also implement 'aggregate' and that, by default, have a default implementation of 'quantify' as a pipeline of 'classify' and 'aggregate'; this helps speeding up evaluations A LOT, since the documents can be pre-classified and the samples are carried out across pre-classified values (labels, or posterior probabilities), and thus only aggregate is called many times within the artificial sampling protocol 2020-12-11 19:28:17 +01:00			`estim_prevalence = quantification_func(sample.instances)`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00			`return true_prevalence, estim_prevalence`

added model selection for quantification 2020-12-22 17:43:23 +01:00			`pbar = tqdm(indexes, desc='[artificial sampling protocol] predicting') if verbose else indexes`
parallel functionality added to quapy in order to allow for multiprocess parallelization (and not threading) handling quapy's environment variables 2021-01-27 09:54:41 +01:00			`results = qp.util.parallel(_predict_prevalences, pbar, n_jobs=n_jobs)`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00
			`true_prevalences, estim_prevalences = zip(*results)`
			`true_prevalences = np.asarray(true_prevalences)`
			`estim_prevalences = np.asarray(estim_prevalences)`

			`return true_prevalences, estim_prevalences`


all uci datasets from Pérez-Gállego added, quantification report added 2021-01-28 18:22:43 +01:00			`def artificial_sampling_report(`
			`model: BaseQuantifier,`
			`test: LabelledCollection,`
			`sample_size,`
			`n_prevpoints=210,`
			`n_repetitions=1,`
			`n_jobs=1,`
			`random_seed=42,`
			`error_metrics:Iterable[Union[str,Callable]]='mae',`
			`verbose=True):`

			`if isinstance(error_metrics, str):`
			`error_metrics=[error_metrics]`

			`error_names = [e if isinstance(e, str) else e.__name__ for e in error_metrics]`
			`error_funcs = [qp.error.from_name(e) if isinstance(e, str) else e for e in error_metrics]`
			`assert all(hasattr(e, '__call__') for e in error_funcs), 'invalid error functions'`

			`df = pd.DataFrame(columns=['true-prev', 'estim-prev']+error_names)`
			`true_prevs, estim_prevs = artificial_sampling_prediction(`
			`model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose`
			`)`
			`for true_prev, estim_prev in zip(true_prevs, estim_prevs):`
			`series = {'true-prev': true_prev, 'estim-prev': estim_prev}`
			`for error_name, error_metric in zip(error_names, error_funcs):`
			`score = error_metric(true_prev, estim_prev)`
			`series[error_name] = score`
			`df = df.append(series, ignore_index=True)`

			`return df`


			`def artificial_sampling_eval(`
			`model: BaseQuantifier,`
			`test: LabelledCollection,`
			`sample_size,`
			`n_prevpoints=210,`
			`n_repetitions=1,`
			`n_jobs=1,`
			`random_seed=42,`
			`error_metric:Union[str,Callable]='mae',`
			`verbose=True):`

			`if isinstance(error_metric, str):`
			`error_metric = qp.error.from_name(error_metric)`

			`assert hasattr(error_metric, '__call__'), 'invalid error function'`

			`true_prevs, estim_prevs = artificial_sampling_prediction(`
			`model, test, sample_size, n_prevpoints, n_repetitions, n_jobs, random_seed, verbose`
			`)`

			`return error_metric(true_prevs, estim_prevs)`


added Ensemble methods (methods ALL, ACC, Ptr, DS from Pérez-Gallego et al 2017 and 2019) and some UCI ML datasets used in those articles (only 5 datasets out of 32 they used) 2021-01-06 14:58:29 +01:00			`def evaluate(model: BaseQuantifier, test_samples:Iterable[LabelledCollection], err:Union[str, Callable], n_jobs:int=-1):`
			`if isinstance(err, str):`
parallel functionality added to quapy in order to allow for multiprocess parallelization (and not threading) handling quapy's environment variables 2021-01-27 09:54:41 +01:00			`err = qp.error.from_name(err)`
			`scores = qp.util.parallel(_delayed_eval, ((model, Ti, err) for Ti in test_samples), n_jobs=n_jobs)`
added Ensemble methods (methods ALL, ACC, Ptr, DS from Pérez-Gallego et al 2017 and 2019) and some UCI ML datasets used in those articles (only 5 datasets out of 32 they used) 2021-01-06 14:58:29 +01:00			`return np.mean(scores)`

evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00
parallel functionality added to quapy in order to allow for multiprocess parallelization (and not threading) handling quapy's environment variables 2021-01-27 09:54:41 +01:00			`def _delayed_eval(args):`
			`model, test, error = args`
added Ensemble methods (methods ALL, ACC, Ptr, DS from Pérez-Gallego et al 2017 and 2019) and some UCI ML datasets used in those articles (only 5 datasets out of 32 they used) 2021-01-06 14:58:29 +01:00			`prev_estim = model.quantify(test.instances)`
			`prev_true = test.prevalence()`
			`return error(prev_true, prev_estim)`
evaluation by artificial prevalence sampling added. New methods added. New util functions added to quapy.functional and quapy.utils 2020-12-10 19:04:33 +01:00