forked from moreo/QuaPy
Merge branch 'master' of github.com:HLT-ISTI/QuaPy
This commit is contained in:
commit
6a5c528154
|
@ -215,7 +215,7 @@ def __check_eps(eps=None):
|
||||||
|
|
||||||
|
|
||||||
CLASSIFICATION_ERROR = {f1e, acce}
|
CLASSIFICATION_ERROR = {f1e, acce}
|
||||||
QUANTIFICATION_ERROR = {mae, mrae, mse, mkld, mnkld}
|
QUANTIFICATION_ERROR = {mae, mrae, mse, mkld, mnkld, ae, rae, se, kld, nkld}
|
||||||
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, mkld, mnkld, mrae}
|
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, mkld, mnkld, mrae}
|
||||||
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
|
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
|
||||||
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
|
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
|
||||||
|
|
|
@ -6,7 +6,7 @@ import inspect
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.method.base import BaseQuantifier
|
from quapy.method.base import BaseQuantifier
|
||||||
from quapy.util import temp_seed
|
from quapy.util import temp_seed, _check_sample_size
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
@ -14,9 +14,9 @@ import pandas as pd
|
||||||
def artificial_prevalence_prediction(
|
def artificial_prevalence_prediction(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
test: LabelledCollection,
|
test: LabelledCollection,
|
||||||
sample_size,
|
sample_size=None,
|
||||||
n_prevpoints=101,
|
n_prevpoints=101,
|
||||||
repeats=1,
|
n_repetitions=1,
|
||||||
eval_budget: int = None,
|
eval_budget: int = None,
|
||||||
n_jobs=1,
|
n_jobs=1,
|
||||||
random_seed=42,
|
random_seed=42,
|
||||||
|
@ -31,10 +31,11 @@ def artificial_prevalence_prediction(
|
||||||
|
|
||||||
:param model: the model in charge of generating the class prevalence estimations
|
:param model: the model in charge of generating the class prevalence estimations
|
||||||
:param test: the test set on which to perform APP
|
:param test: the test set on which to perform APP
|
||||||
:param sample_size: integer, the size of the samples
|
:param sample_size: integer, the size of the samples; if None, then the sample size is
|
||||||
|
taken from qp.environ['SAMPLE_SIZE']
|
||||||
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
|
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
|
||||||
is specified; default 101, i.e., steps of 1%)
|
is specified; default 101, i.e., steps of 1%)
|
||||||
:param repeats: integer, the number of repetitions for each prevalence (default 1)
|
:param n_repetitions: integer, the number of repetitions for each prevalence (default 1)
|
||||||
:param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
|
:param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
|
||||||
there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this
|
there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this
|
||||||
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and
|
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and
|
||||||
|
@ -48,10 +49,11 @@ def artificial_prevalence_prediction(
|
||||||
for the samples generated while the second one contains the prevalence estimations
|
for the samples generated while the second one contains the prevalence estimations
|
||||||
"""
|
"""
|
||||||
|
|
||||||
n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, repeats, verbose)
|
sample_size = _check_sample_size(sample_size)
|
||||||
|
n_prevpoints, _ = qp.evaluation._check_num_evals(test.n_classes, n_prevpoints, eval_budget, n_repetitions, verbose)
|
||||||
|
|
||||||
with temp_seed(random_seed):
|
with temp_seed(random_seed):
|
||||||
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, repeats))
|
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
|
||||||
|
|
||||||
return _predict_from_indexes(indexes, model, test, n_jobs, verbose)
|
return _predict_from_indexes(indexes, model, test, n_jobs, verbose)
|
||||||
|
|
||||||
|
@ -59,8 +61,8 @@ def artificial_prevalence_prediction(
|
||||||
def natural_prevalence_prediction(
|
def natural_prevalence_prediction(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
test: LabelledCollection,
|
test: LabelledCollection,
|
||||||
sample_size,
|
sample_size=None,
|
||||||
repeats,
|
repeats=100,
|
||||||
n_jobs=1,
|
n_jobs=1,
|
||||||
random_seed=42,
|
random_seed=42,
|
||||||
verbose=False):
|
verbose=False):
|
||||||
|
@ -71,8 +73,9 @@ def natural_prevalence_prediction(
|
||||||
|
|
||||||
:param model: the model in charge of generating the class prevalence estimations
|
:param model: the model in charge of generating the class prevalence estimations
|
||||||
:param test: the test set on which to perform NPP
|
:param test: the test set on which to perform NPP
|
||||||
:param sample_size: integer, the size of the samples
|
:param sample_size: integer, the size of the samples; if None, then the sample size is
|
||||||
:param repeats: integer, the number of samples to generate
|
taken from qp.environ['SAMPLE_SIZE']
|
||||||
|
:param repeats: integer, the number of samples to generate (default 100)
|
||||||
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
|
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
|
||||||
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
|
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
|
||||||
any other random process (default 42)
|
any other random process (default 42)
|
||||||
|
@ -82,6 +85,7 @@ def natural_prevalence_prediction(
|
||||||
for the samples generated while the second one contains the prevalence estimations
|
for the samples generated while the second one contains the prevalence estimations
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
sample_size = _check_sample_size(sample_size)
|
||||||
with temp_seed(random_seed):
|
with temp_seed(random_seed):
|
||||||
indexes = list(test.natural_sampling_index_generator(sample_size, repeats))
|
indexes = list(test.natural_sampling_index_generator(sample_size, repeats))
|
||||||
|
|
||||||
|
@ -162,9 +166,9 @@ def _predict_from_indexes(
|
||||||
def artificial_prevalence_report(
|
def artificial_prevalence_report(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
test: LabelledCollection,
|
test: LabelledCollection,
|
||||||
sample_size,
|
sample_size=None,
|
||||||
n_prevpoints=101,
|
n_prevpoints=101,
|
||||||
repeats=1,
|
n_repetitions=1,
|
||||||
eval_budget: int = None,
|
eval_budget: int = None,
|
||||||
n_jobs=1,
|
n_jobs=1,
|
||||||
random_seed=42,
|
random_seed=42,
|
||||||
|
@ -184,10 +188,11 @@ def artificial_prevalence_report(
|
||||||
|
|
||||||
:param model: the model in charge of generating the class prevalence estimations
|
:param model: the model in charge of generating the class prevalence estimations
|
||||||
:param test: the test set on which to perform APP
|
:param test: the test set on which to perform APP
|
||||||
:param sample_size: integer, the size of the samples
|
:param sample_size: integer, the size of the samples; if None, then the sample size is
|
||||||
|
taken from qp.environ['SAMPLE_SIZE']
|
||||||
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
|
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
|
||||||
is specified; default 101, i.e., steps of 1%)
|
is specified; default 101, i.e., steps of 1%)
|
||||||
:param repeats: integer, the number of repetitions for each prevalence (default 1)
|
:param n_repetitions: integer, the number of repetitions for each prevalence (default 1)
|
||||||
:param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
|
:param eval_budget: integer, if specified, sets a ceil on the number of evaluations to perform. For example, if
|
||||||
there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this
|
there are 3 classes, `repeats=1`, and `eval_budget=20`, then `n_prevpoints` will be set to 5, since this
|
||||||
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and
|
will generate 15 different prevalence vectors ([0, 0, 1], [0, 0.25, 0.75], [0, 0.5, 0.5] ... [1, 0, 0]) and
|
||||||
|
@ -205,7 +210,7 @@ def artificial_prevalence_report(
|
||||||
"""
|
"""
|
||||||
|
|
||||||
true_prevs, estim_prevs = artificial_prevalence_prediction(
|
true_prevs, estim_prevs = artificial_prevalence_prediction(
|
||||||
model, test, sample_size, n_prevpoints, repeats, eval_budget, n_jobs, random_seed, verbose
|
model, test, sample_size, n_prevpoints, n_repetitions, eval_budget, n_jobs, random_seed, verbose
|
||||||
)
|
)
|
||||||
return _prevalence_report(true_prevs, estim_prevs, error_metrics)
|
return _prevalence_report(true_prevs, estim_prevs, error_metrics)
|
||||||
|
|
||||||
|
@ -213,8 +218,8 @@ def artificial_prevalence_report(
|
||||||
def natural_prevalence_report(
|
def natural_prevalence_report(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
test: LabelledCollection,
|
test: LabelledCollection,
|
||||||
sample_size,
|
sample_size=None,
|
||||||
repeats=1,
|
repeats=100,
|
||||||
n_jobs=1,
|
n_jobs=1,
|
||||||
random_seed=42,
|
random_seed=42,
|
||||||
error_metrics:Iterable[Union[str,Callable]]='mae',
|
error_metrics:Iterable[Union[str,Callable]]='mae',
|
||||||
|
@ -230,8 +235,9 @@ def natural_prevalence_report(
|
||||||
|
|
||||||
:param model: the model in charge of generating the class prevalence estimations
|
:param model: the model in charge of generating the class prevalence estimations
|
||||||
:param test: the test set on which to perform NPP
|
:param test: the test set on which to perform NPP
|
||||||
:param sample_size: integer, the size of the samples
|
:param sample_size: integer, the size of the samples; if None, then the sample size is
|
||||||
:param repeats: integer, the number of samples to generate
|
taken from qp.environ['SAMPLE_SIZE']
|
||||||
|
:param repeats: integer, the number of samples to generate (default 100)
|
||||||
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
|
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
|
||||||
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
|
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
|
||||||
any other random process (default 42)
|
any other random process (default 42)
|
||||||
|
@ -244,7 +250,7 @@ def natural_prevalence_report(
|
||||||
for the samples generated while the second one contains the prevalence estimations
|
for the samples generated while the second one contains the prevalence estimations
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
sample_size = _check_sample_size(sample_size)
|
||||||
true_prevs, estim_prevs = natural_prevalence_prediction(
|
true_prevs, estim_prevs = natural_prevalence_prediction(
|
||||||
model, test, sample_size, repeats, n_jobs, random_seed, verbose
|
model, test, sample_size, repeats, n_jobs, random_seed, verbose
|
||||||
)
|
)
|
||||||
|
@ -300,7 +306,7 @@ def _prevalence_report(
|
||||||
def artificial_prevalence_protocol(
|
def artificial_prevalence_protocol(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
test: LabelledCollection,
|
test: LabelledCollection,
|
||||||
sample_size,
|
sample_size=None,
|
||||||
n_prevpoints=101,
|
n_prevpoints=101,
|
||||||
repeats=1,
|
repeats=1,
|
||||||
eval_budget: int = None,
|
eval_budget: int = None,
|
||||||
|
@ -318,7 +324,8 @@ def artificial_prevalence_protocol(
|
||||||
|
|
||||||
:param model: the model in charge of generating the class prevalence estimations
|
:param model: the model in charge of generating the class prevalence estimations
|
||||||
:param test: the test set on which to perform APP
|
:param test: the test set on which to perform APP
|
||||||
:param sample_size: integer, the size of the samples
|
:param sample_size: integer, the size of the samples; if None, then the sample size is
|
||||||
|
taken from qp.environ['SAMPLE_SIZE']
|
||||||
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
|
:param n_prevpoints: integer, the number of different prevalences to sample (or set to None if eval_budget
|
||||||
is specified; default 101, i.e., steps of 1%)
|
is specified; default 101, i.e., steps of 1%)
|
||||||
:param repeats: integer, the number of repetitions for each prevalence (default 1)
|
:param repeats: integer, the number of repetitions for each prevalence (default 1)
|
||||||
|
@ -350,8 +357,8 @@ def artificial_prevalence_protocol(
|
||||||
def natural_prevalence_protocol(
|
def natural_prevalence_protocol(
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
test: LabelledCollection,
|
test: LabelledCollection,
|
||||||
sample_size,
|
sample_size=None,
|
||||||
repeats=1,
|
repeats=100,
|
||||||
n_jobs=1,
|
n_jobs=1,
|
||||||
random_seed=42,
|
random_seed=42,
|
||||||
error_metric:Union[str,Callable]='mae',
|
error_metric:Union[str,Callable]='mae',
|
||||||
|
@ -363,7 +370,8 @@ def natural_prevalence_protocol(
|
||||||
|
|
||||||
:param model: the model in charge of generating the class prevalence estimations
|
:param model: the model in charge of generating the class prevalence estimations
|
||||||
:param test: the test set on which to perform NPP
|
:param test: the test set on which to perform NPP
|
||||||
:param sample_size: integer, the size of the samples
|
:param sample_size: integer, the size of the samples; if None, then the sample size is
|
||||||
|
taken from qp.environ['SAMPLE_SIZE']
|
||||||
:param repeats: integer, the number of samples to generate
|
:param repeats: integer, the number of samples to generate
|
||||||
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
|
:param n_jobs: integer, number of jobs to be run in parallel (default 1)
|
||||||
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
|
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
|
||||||
|
|
|
@ -11,6 +11,8 @@ from quapy.evaluation import artificial_prevalence_prediction, natural_prevalenc
|
||||||
from quapy.method.aggregative import BaseQuantifier
|
from quapy.method.aggregative import BaseQuantifier
|
||||||
import inspect
|
import inspect
|
||||||
|
|
||||||
|
from util import _check_sample_size
|
||||||
|
|
||||||
|
|
||||||
class GridSearchQ(BaseQuantifier):
|
class GridSearchQ(BaseQuantifier):
|
||||||
"""Grid Search optimization targeting a quantification-oriented metric.
|
"""Grid Search optimization targeting a quantification-oriented metric.
|
||||||
|
@ -57,7 +59,7 @@ class GridSearchQ(BaseQuantifier):
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
model: BaseQuantifier,
|
model: BaseQuantifier,
|
||||||
param_grid: dict,
|
param_grid: dict,
|
||||||
sample_size: Union[int, None],
|
sample_size: Union[int, None] = None,
|
||||||
protocol='app',
|
protocol='app',
|
||||||
n_prevpoints: int = None,
|
n_prevpoints: int = None,
|
||||||
n_repetitions: int = 1,
|
n_repetitions: int = 1,
|
||||||
|
@ -105,7 +107,7 @@ class GridSearchQ(BaseQuantifier):
|
||||||
return training, validation
|
return training, validation
|
||||||
elif isinstance(validation, float):
|
elif isinstance(validation, float):
|
||||||
assert 0. < validation < 1., 'validation proportion should be in (0,1)'
|
assert 0. < validation < 1., 'validation proportion should be in (0,1)'
|
||||||
training, validation = training.split_stratified(train_prop=1 - validation)
|
training, validation = training.split_stratified(train_prop=1 - validation, random_state=self.random_seed)
|
||||||
return training, validation
|
return training, validation
|
||||||
elif self.protocol=='gen' and inspect.isgenerator(validation()):
|
elif self.protocol=='gen' and inspect.isgenerator(validation()):
|
||||||
return training, validation
|
return training, validation
|
||||||
|
@ -163,7 +165,7 @@ class GridSearchQ(BaseQuantifier):
|
||||||
val_split = self.val_split
|
val_split = self.val_split
|
||||||
training, val_split = self.__check_training_validation(training, val_split)
|
training, val_split = self.__check_training_validation(training, val_split)
|
||||||
if self.protocol != 'gen':
|
if self.protocol != 'gen':
|
||||||
assert isinstance(self.sample_size, int) and self.sample_size > 0, 'sample_size must be a positive integer'
|
self.sample_size = _check_sample_size(self.sample_size)
|
||||||
|
|
||||||
params_keys = list(self.param_grid.keys())
|
params_keys = list(self.param_grid.keys())
|
||||||
params_values = list(self.param_grid.values())
|
params_values = list(self.param_grid.values())
|
||||||
|
|
|
@ -176,6 +176,16 @@ def pickled_resource(pickle_path:str, generation_func:callable, *args):
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
|
|
||||||
|
def _check_sample_size(sample_size):
|
||||||
|
if sample_size is None:
|
||||||
|
assert qp.environ['SAMPLE_SIZE'] is not None, \
|
||||||
|
'error: sample_size set to None, and cannot be resolved from the environment'
|
||||||
|
sample_size = qp.environ['SAMPLE_SIZE']
|
||||||
|
assert isinstance(sample_size, int) and sample_size > 0, \
|
||||||
|
'error: sample_size is not a positive integer'
|
||||||
|
return sample_size
|
||||||
|
|
||||||
|
|
||||||
class EarlyStop:
|
class EarlyStop:
|
||||||
"""
|
"""
|
||||||
A class implementing the early-stopping condition typically used for training neural networks.
|
A class implementing the early-stopping condition typically used for training neural networks.
|
||||||
|
|
Loading…
Reference in New Issue