adding environment variables for N_JOBS, and adding a default classifier (sklearn's logistic regression) for when the classifier is not specified in aggregative quantifiers
This commit is contained in:
parent
9ad36ef008
commit
ad11b86168
|
@ -1,10 +1,26 @@
|
||||||
Change Log 0.1.9
|
Change Log 0.1.9
|
||||||
----------------
|
----------------
|
||||||
- [TODO] add LeQua2024
|
- [TODO] add LeQua2024
|
||||||
- [TODO] add njobs to env
|
|
||||||
- [TODO] add basic examples
|
- Added a default classifier for aggregative quantifiers, which now can be instantiated without specifying
|
||||||
- [TODO] add default classifier to env
|
the classifier. The default classifier can be accessed in qp.environ['DEFAULT_CLS'] and is assigned to
|
||||||
- [TODO] add default classifier to env
|
sklearn.linear_model.LogisticRegression(max_iter=3000). If the classifier is not specified, then a clone
|
||||||
|
of said classifier is returned. E.g.:
|
||||||
|
> pacc = PACC()
|
||||||
|
is equivalent to:
|
||||||
|
> pacc = PACC(classifier=LogisticRegression(max_iter=3000))
|
||||||
|
|
||||||
|
- Improved error loging in model selection. In v0.1.8 only Status.INVALID was reported; in v0.1.9 it is
|
||||||
|
now accompanied by a textual description of the error
|
||||||
|
|
||||||
|
- The number of parallel workers can now be set via an environment variable by running, e.g.:
|
||||||
|
> N_JOBS=10 python3 your_script.py
|
||||||
|
which has the same effect as writing the following code at the beginning of your_script.py:
|
||||||
|
> import quapy as qp
|
||||||
|
> qp.environ["N_JOBS"] = 10
|
||||||
|
|
||||||
|
- Some examples have been added to the ./examples/ dir, which now contains numbered examples from basics (0)
|
||||||
|
to advanced topics (higher numbers)
|
||||||
|
|
||||||
- Moved the wiki documents to the ./docs/ folder so that they become editable via PR for the community
|
- Moved the wiki documents to the ./docs/ folder so that they become editable via PR for the community
|
||||||
|
|
||||||
|
|
|
@ -33,9 +33,10 @@ import quapy.functional as F # <- this module has some functional utilities, li
|
||||||
print(f'training prevalence = {F.strprev(train.prevalence())}')
|
print(f'training prevalence = {F.strprev(train.prevalence())}')
|
||||||
|
|
||||||
# let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
|
# let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
|
||||||
classifier = LogisticRegression()
|
# classifier = LogisticRegression()
|
||||||
|
|
||||||
pacc = qp.method.aggregative.PACC(classifier)
|
# pacc = qp.method.aggregative.PACC(classifier)
|
||||||
|
pacc = qp.method.aggregative.PACC()
|
||||||
|
|
||||||
print(f'training {pacc}')
|
print(f'training {pacc}')
|
||||||
pacc.fit(train)
|
pacc.fit(train)
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from method._kdey import KDEyML
|
from quapy.protocol import UPP
|
||||||
from quapy.method.non_aggregative import DMx
|
|
||||||
from quapy.protocol import APP, UPP
|
|
||||||
from quapy.method.aggregative import DMy
|
from quapy.method.aggregative import DMy
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from examples.comparing_gridsearch import OLD_GridSearchQ
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
|
@ -12,10 +9,15 @@ from time import time
|
||||||
In this example, we show how to perform model selection on a DistributionMatching quantifier.
|
In this example, we show how to perform model selection on a DistributionMatching quantifier.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model = DMy(LogisticRegression())
|
model = DMy()
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = 100
|
qp.environ['SAMPLE_SIZE'] = 100
|
||||||
qp.environ['N_JOBS'] = -1
|
|
||||||
|
print(f'running model selection with N_JOBS={qp.environ["N_JOBS"]}; '
|
||||||
|
f'to increase the number of jobs use:\n> N_JOBS=-1 python3 1.model_selection.py\n'
|
||||||
|
f'alternatively, you can set this variable within the script as:\n'
|
||||||
|
f'import quapy as qp\n'
|
||||||
|
f'qp.environ["N_JOBS"]=-1')
|
||||||
|
|
||||||
training, test = qp.datasets.fetch_UCIMulticlassDataset('letter').train_test
|
training, test = qp.datasets.fetch_UCIMulticlassDataset('letter').train_test
|
||||||
|
|
||||||
|
@ -42,7 +44,7 @@ with qp.util.temp_seed(0):
|
||||||
# different configurations of the quantifier. In other words, quapy avoids to train
|
# different configurations of the quantifier. In other words, quapy avoids to train
|
||||||
# the classifier 7x7 times.
|
# the classifier 7x7 times.
|
||||||
param_grid = {
|
param_grid = {
|
||||||
'classifier__C': np.logspace(-3,3,7),
|
'classifier__C': np.logspace(-3, 3, 7),
|
||||||
'nbins': [2, 3, 4, 5, 10, 15, 20]
|
'nbins': [2, 3, 4, 5, 10, 15, 20]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ import numpy as np
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.method.aggregative import PACC, EMQ, KDEyML
|
from quapy.method.aggregative import PACC, EMQ
|
||||||
from quapy.model_selection import GridSearchQ
|
from quapy.model_selection import GridSearchQ
|
||||||
from quapy.protocol import UPP
|
from quapy.protocol import UPP
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -52,6 +52,7 @@ def load_timings(result_path):
|
||||||
df = pd.read_csv(result_path+'.csv', sep='\t')
|
df = pd.read_csv(result_path+'.csv', sep='\t')
|
||||||
return timings | df.pivot_table(index='Dataset', columns='Method', values='t_train').to_dict()
|
return timings | df.pivot_table(index='Dataset', columns='Method', values='t_train').to_dict()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = 500
|
qp.environ['SAMPLE_SIZE'] = 500
|
||||||
|
|
|
@ -1,15 +1,18 @@
|
||||||
"""QuaPy module for quantification"""
|
"""QuaPy module for quantification"""
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
from quapy.data import datasets
|
from quapy.data import datasets
|
||||||
from . import error
|
from . import error
|
||||||
from . import data
|
from . import data
|
||||||
from . import functional
|
from . import functional
|
||||||
# from . import method
|
from . import method
|
||||||
from . import evaluation
|
from . import evaluation
|
||||||
from . import protocol
|
from . import protocol
|
||||||
from . import plot
|
from . import plot
|
||||||
from . import util
|
from . import util
|
||||||
from . import model_selection
|
from . import model_selection
|
||||||
from . import classification
|
from . import classification
|
||||||
|
import os
|
||||||
|
|
||||||
__version__ = '0.1.9'
|
__version__ = '0.1.9'
|
||||||
|
|
||||||
|
@ -20,7 +23,8 @@ environ = {
|
||||||
'PAD_TOKEN': '[PAD]',
|
'PAD_TOKEN': '[PAD]',
|
||||||
'PAD_INDEX': 1,
|
'PAD_INDEX': 1,
|
||||||
'SVMPERF_HOME': './svm_perf_quantification',
|
'SVMPERF_HOME': './svm_perf_quantification',
|
||||||
'N_JOBS': 1
|
'N_JOBS': int(os.getenv('N_JOBS', 1)),
|
||||||
|
'DEFAULT_CLS': LogisticRegression(max_iter=3000)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -48,3 +52,19 @@ def _get_sample_size(sample_size):
|
||||||
if sample_size is None:
|
if sample_size is None:
|
||||||
raise ValueError('neither sample_size nor qp.environ["SAMPLE_SIZE"] have been specified')
|
raise ValueError('neither sample_size nor qp.environ["SAMPLE_SIZE"] have been specified')
|
||||||
return sample_size
|
return sample_size
|
||||||
|
|
||||||
|
|
||||||
|
def _get_classifier(classifier):
|
||||||
|
"""
|
||||||
|
If `classifier` is None, then it returns `environ['DEFAULT_CLS']`;
|
||||||
|
if otherwise, returns `classifier`.
|
||||||
|
|
||||||
|
:param classifier: sklearn's estimator or None
|
||||||
|
:return: sklearn's estimator
|
||||||
|
"""
|
||||||
|
if classifier is None:
|
||||||
|
from sklearn.base import clone
|
||||||
|
classifier = clone(environ['DEFAULT_CLS'])
|
||||||
|
if classifier is None:
|
||||||
|
raise ValueError('neither classifier nor qp.environ["DEFAULT_CLS"] have been specified')
|
||||||
|
return classifier
|
||||||
|
|
|
@ -24,12 +24,14 @@ class KDEBase:
|
||||||
Checks that the bandwidth parameter is correct
|
Checks that the bandwidth parameter is correct
|
||||||
|
|
||||||
:param bandwidth: either a string (see BANDWIDTH_METHOD) or a float
|
:param bandwidth: either a string (see BANDWIDTH_METHOD) or a float
|
||||||
:return: nothing, but raises an exception for invalid values
|
:return: the bandwidth if the check is passed, or raises an exception for invalid values
|
||||||
"""
|
"""
|
||||||
assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
|
assert bandwidth in KDEBase.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
|
||||||
f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
|
f'invalid bandwidth, valid ones are {KDEBase.BANDWIDTH_METHOD} or float values'
|
||||||
if isinstance(bandwidth, float):
|
if isinstance(bandwidth, float):
|
||||||
assert 0 < bandwidth < 1, "the bandwith for KDEy should be in (0,1), since this method models the unit simplex"
|
assert 0 < bandwidth < 1, \
|
||||||
|
"the bandwith for KDEy should be in (0,1), since this method models the unit simplex"
|
||||||
|
return bandwidth
|
||||||
|
|
||||||
def get_kde_function(self, X, bandwidth):
|
def get_kde_function(self, X, bandwidth):
|
||||||
"""
|
"""
|
||||||
|
@ -106,16 +108,13 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
|
||||||
Alternatively, this set can be specified at fit time by indicating the exact set of data
|
Alternatively, this set can be specified at fit time by indicating the exact set of data
|
||||||
on which the predictions are to be generated.
|
on which the predictions are to be generated.
|
||||||
:param bandwidth: float, the bandwidth of the Kernel
|
:param bandwidth: float, the bandwidth of the Kernel
|
||||||
:param n_jobs: number of parallel workers
|
|
||||||
:param random_state: a seed to be set before fitting any base quantifier (default None)
|
:param random_state: a seed to be set before fitting any base quantifier (default None)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None, random_state=None):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5, bandwidth=0.1, random_state=None):
|
||||||
self._check_bandwidth(bandwidth)
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.classifier = classifier
|
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.bandwidth = bandwidth
|
self.bandwidth = KDEBase._check_bandwidth(bandwidth)
|
||||||
self.n_jobs = n_jobs
|
|
||||||
self.random_state=random_state
|
self.random_state=random_state
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||||
|
@ -130,17 +129,17 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
|
||||||
:param posteriors: instances in the sample converted into posterior probabilities
|
:param posteriors: instances in the sample converted into posterior probabilities
|
||||||
:return: a vector of class prevalence estimates
|
:return: a vector of class prevalence estimates
|
||||||
"""
|
"""
|
||||||
np.random.RandomState(self.random_state)
|
with qp.util.temp_seed(self.random_state):
|
||||||
epsilon = 1e-10
|
epsilon = 1e-10
|
||||||
n_classes = len(self.mix_densities)
|
n_classes = len(self.mix_densities)
|
||||||
test_densities = [self.pdf(kde_i, posteriors) for kde_i in self.mix_densities]
|
test_densities = [self.pdf(kde_i, posteriors) for kde_i in self.mix_densities]
|
||||||
|
|
||||||
def neg_loglikelihood(prev):
|
def neg_loglikelihood(prev):
|
||||||
test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
|
test_mixture_likelihood = sum(prev_i * dens_i for prev_i, dens_i in zip (prev, test_densities))
|
||||||
test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
||||||
return -np.sum(test_loglikelihood)
|
return -np.sum(test_loglikelihood)
|
||||||
|
|
||||||
return F.optim_minimize(neg_loglikelihood, n_classes)
|
return F.optim_minimize(neg_loglikelihood, n_classes)
|
||||||
|
|
||||||
|
|
||||||
class KDEyHD(AggregativeSoftQuantifier, KDEBase):
|
class KDEyHD(AggregativeSoftQuantifier, KDEBase):
|
||||||
|
@ -183,20 +182,17 @@ class KDEyHD(AggregativeSoftQuantifier, KDEBase):
|
||||||
Alternatively, this set can be specified at fit time by indicating the exact set of data
|
Alternatively, this set can be specified at fit time by indicating the exact set of data
|
||||||
on which the predictions are to be generated.
|
on which the predictions are to be generated.
|
||||||
:param bandwidth: float, the bandwidth of the Kernel
|
:param bandwidth: float, the bandwidth of the Kernel
|
||||||
:param n_jobs: number of parallel workers
|
|
||||||
:param random_state: a seed to be set before fitting any base quantifier (default None)
|
:param random_state: a seed to be set before fitting any base quantifier (default None)
|
||||||
:param montecarlo_trials: number of Monte Carlo trials (default 10000)
|
:param montecarlo_trials: number of Monte Carlo trials (default 10000)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=10, divergence: str='HD',
|
def __init__(self, classifier: BaseEstimator=None, val_split=5, divergence: str='HD',
|
||||||
bandwidth=0.1, n_jobs=None, random_state=None, montecarlo_trials=10000):
|
bandwidth=0.1, random_state=None, montecarlo_trials=10000):
|
||||||
|
|
||||||
self._check_bandwidth(bandwidth)
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.classifier = classifier
|
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.divergence = divergence
|
self.divergence = divergence
|
||||||
self.bandwidth = bandwidth
|
self.bandwidth = KDEBase._check_bandwidth(bandwidth)
|
||||||
self.n_jobs = n_jobs
|
|
||||||
self.random_state=random_state
|
self.random_state=random_state
|
||||||
self.montecarlo_trials = montecarlo_trials
|
self.montecarlo_trials = montecarlo_trials
|
||||||
|
|
||||||
|
@ -278,15 +274,12 @@ class KDEyCS(AggregativeSoftQuantifier):
|
||||||
Alternatively, this set can be specified at fit time by indicating the exact set of data
|
Alternatively, this set can be specified at fit time by indicating the exact set of data
|
||||||
on which the predictions are to be generated.
|
on which the predictions are to be generated.
|
||||||
:param bandwidth: float, the bandwidth of the Kernel
|
:param bandwidth: float, the bandwidth of the Kernel
|
||||||
:param n_jobs: number of parallel workers
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=10, bandwidth=0.1, n_jobs=None):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5, bandwidth=0.1):
|
||||||
KDEBase._check_bandwidth(bandwidth)
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.classifier = classifier
|
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.bandwidth = bandwidth
|
self.bandwidth = KDEBase._check_bandwidth(bandwidth)
|
||||||
self.n_jobs = n_jobs
|
|
||||||
|
|
||||||
def gram_matrix_mix_sum(self, X, Y=None):
|
def gram_matrix_mix_sum(self, X, Y=None):
|
||||||
# this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
|
# this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))
|
||||||
|
@ -355,7 +348,7 @@ class KDEyCS(AggregativeSoftQuantifier):
|
||||||
# called \overline{r} in the paper
|
# called \overline{r} in the paper
|
||||||
alpha_ratio = alpha * self.counts_inv
|
alpha_ratio = alpha * self.counts_inv
|
||||||
|
|
||||||
# recal that tr_te_sums already accounts for the constant terms (1/Li)*(1/M)
|
# recall that tr_te_sums already accounts for the constant terms (1/Li)*(1/M)
|
||||||
partA = -np.log((alpha_ratio @ tr_te_sums) * Minv)
|
partA = -np.log((alpha_ratio @ tr_te_sums) * Minv)
|
||||||
partB = 0.5 * np.log(alpha_ratio @ tr_tr_sums @ alpha_ratio)
|
partB = 0.5 * np.log(alpha_ratio @ tr_tr_sums @ alpha_ratio)
|
||||||
return partA + partB #+ partC
|
return partA + partB #+ partC
|
||||||
|
|
|
@ -27,8 +27,8 @@ class ThresholdOptimization(BinaryAggregativeQuantifier):
|
||||||
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=None, n_jobs=None):
|
def __init__(self, classifier: BaseEstimator=None, val_split=None, n_jobs=None):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.n_jobs = qp._get_njobs(n_jobs)
|
self.n_jobs = qp._get_njobs(n_jobs)
|
||||||
|
|
||||||
|
@ -143,7 +143,7 @@ class T50(ThresholdOptimization):
|
||||||
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=5):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5):
|
||||||
super().__init__(classifier, val_split)
|
super().__init__(classifier, val_split)
|
||||||
|
|
||||||
def condition(self, tpr, fpr) -> float:
|
def condition(self, tpr, fpr) -> float:
|
||||||
|
@ -167,7 +167,7 @@ class MAX(ThresholdOptimization):
|
||||||
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=5):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5):
|
||||||
super().__init__(classifier, val_split)
|
super().__init__(classifier, val_split)
|
||||||
|
|
||||||
def condition(self, tpr, fpr) -> float:
|
def condition(self, tpr, fpr) -> float:
|
||||||
|
@ -192,7 +192,7 @@ class X(ThresholdOptimization):
|
||||||
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=5):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5):
|
||||||
super().__init__(classifier, val_split)
|
super().__init__(classifier, val_split)
|
||||||
|
|
||||||
def condition(self, tpr, fpr) -> float:
|
def condition(self, tpr, fpr) -> float:
|
||||||
|
@ -215,7 +215,7 @@ class MS(ThresholdOptimization):
|
||||||
`k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
|
`k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
|
||||||
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
||||||
"""
|
"""
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=5):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5):
|
||||||
super().__init__(classifier, val_split)
|
super().__init__(classifier, val_split)
|
||||||
|
|
||||||
def condition(self, tpr, fpr) -> float:
|
def condition(self, tpr, fpr) -> float:
|
||||||
|
@ -254,7 +254,7 @@ class MS2(MS):
|
||||||
`k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
|
`k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a
|
||||||
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
:class:`quapy.data.base.LabelledCollection` (the split itself).
|
||||||
"""
|
"""
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=5):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5):
|
||||||
super().__init__(classifier, val_split)
|
super().__init__(classifier, val_split)
|
||||||
|
|
||||||
def discard(self, tpr, fpr) -> bool:
|
def discard(self, tpr, fpr) -> bool:
|
||||||
|
|
|
@ -3,7 +3,6 @@ from copy import deepcopy
|
||||||
from typing import Callable, Literal, Union
|
from typing import Callable, Literal, Union
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from abstention.calibration import NoBiasVectorScaling, TempScaling, VectorScaling
|
from abstention.calibration import NoBiasVectorScaling, TempScaling, VectorScaling
|
||||||
from scipy import optimize
|
|
||||||
from sklearn.base import BaseEstimator
|
from sklearn.base import BaseEstimator
|
||||||
from sklearn.calibration import CalibratedClassifierCV
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
from sklearn.metrics import confusion_matrix
|
from sklearn.metrics import confusion_matrix
|
||||||
|
@ -12,7 +11,6 @@ from sklearn.model_selection import cross_val_predict
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
from quapy.functional import get_divergence
|
from quapy.functional import get_divergence
|
||||||
from quapy.classification.calibration import NBVSCalibration, BCTSCalibration, TSCalibration, VSCalibration
|
|
||||||
from quapy.classification.svmperf import SVMperf
|
from quapy.classification.svmperf import SVMperf
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.method.base import BaseQuantifier, BinaryQuantifier, OneVsAllGeneric
|
from quapy.method.base import BaseQuantifier, BinaryQuantifier, OneVsAllGeneric
|
||||||
|
@ -343,8 +341,8 @@ class CC(AggregativeCrispQuantifier):
|
||||||
:param classifier: a sklearn's Estimator that generates a classifier
|
:param classifier: a sklearn's Estimator that generates a classifier
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator):
|
def __init__(self, classifier: BaseEstimator=None):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||||
"""
|
"""
|
||||||
|
@ -373,8 +371,8 @@ class PCC(AggregativeSoftQuantifier):
|
||||||
:param classifier: a sklearn's Estimator that generates a classifier
|
:param classifier: a sklearn's Estimator that generates a classifier
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator):
|
def __init__(self, classifier: BaseEstimator=None):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||||
"""
|
"""
|
||||||
|
@ -436,14 +434,14 @@ class ACC(AggregativeCrispQuantifier):
|
||||||
"""
|
"""
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator=None,
|
||||||
val_split=5,
|
val_split=5,
|
||||||
solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize',
|
solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize',
|
||||||
method: Literal['inversion', 'invariant-ratio'] = 'inversion',
|
method: Literal['inversion', 'invariant-ratio'] = 'inversion',
|
||||||
norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip',
|
norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip',
|
||||||
n_jobs=None,
|
n_jobs=None,
|
||||||
):
|
):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.n_jobs = qp._get_njobs(n_jobs)
|
self.n_jobs = qp._get_njobs(n_jobs)
|
||||||
self.solver = solver
|
self.solver = solver
|
||||||
|
@ -571,14 +569,14 @@ class PACC(AggregativeSoftQuantifier):
|
||||||
"""
|
"""
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator=None,
|
||||||
val_split=5,
|
val_split=5,
|
||||||
solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize',
|
solver: Literal['minimize', 'exact', 'exact-raise', 'exact-cc'] = 'minimize',
|
||||||
method: Literal['inversion', 'invariant-ratio'] = 'inversion',
|
method: Literal['inversion', 'invariant-ratio'] = 'inversion',
|
||||||
norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip',
|
norm: Literal['clip', 'mapsimplex', 'condsoftmax'] = 'clip',
|
||||||
n_jobs=None
|
n_jobs=None
|
||||||
):
|
):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.n_jobs = qp._get_njobs(n_jobs)
|
self.n_jobs = qp._get_njobs(n_jobs)
|
||||||
self.solver = solver
|
self.solver = solver
|
||||||
|
@ -668,8 +666,8 @@ class EMQ(AggregativeSoftQuantifier):
|
||||||
MAX_ITER = 1000
|
MAX_ITER = 1000
|
||||||
EPSILON = 1e-4
|
EPSILON = 1e-4
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=None, exact_train_prev=True, recalib=None, n_jobs=None):
|
def __init__(self, classifier: BaseEstimator=None, val_split=None, exact_train_prev=True, recalib=None, n_jobs=None):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.exact_train_prev = exact_train_prev
|
self.exact_train_prev = exact_train_prev
|
||||||
self.recalib = recalib
|
self.recalib = recalib
|
||||||
|
@ -832,7 +830,7 @@ class BayesianCC(AggregativeCrispQuantifier):
|
||||||
:param mcmc_seed: random seed for the MCMC sampler (default 0)
|
:param mcmc_seed: random seed for the MCMC sampler (default 0)
|
||||||
"""
|
"""
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
classifier: BaseEstimator,
|
classifier: BaseEstimator=None,
|
||||||
val_split: float = 0.75,
|
val_split: float = 0.75,
|
||||||
num_warmup: int = 500,
|
num_warmup: int = 500,
|
||||||
num_samples: int = 1_000,
|
num_samples: int = 1_000,
|
||||||
|
@ -849,7 +847,7 @@ class BayesianCC(AggregativeCrispQuantifier):
|
||||||
if _bayesian.DEPENDENCIES_INSTALLED is False:
|
if _bayesian.DEPENDENCIES_INSTALLED is False:
|
||||||
raise ImportError("Auxiliary dependencies are required. Run `$ pip install quapy[bayes]` to install them.")
|
raise ImportError("Auxiliary dependencies are required. Run `$ pip install quapy[bayes]` to install them.")
|
||||||
|
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.num_warmup = num_warmup
|
self.num_warmup = num_warmup
|
||||||
self.num_samples = num_samples
|
self.num_samples = num_samples
|
||||||
|
@ -919,8 +917,8 @@ class HDy(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
|
||||||
validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..
|
validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=5):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||||
|
@ -995,8 +993,8 @@ class DyS(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
|
||||||
:param n_jobs: number of parallel workers.
|
:param n_jobs: number of parallel workers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=5, n_bins=8, divergence: Union[str, Callable]= 'HD', tol=1e-05, n_jobs=None):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5, n_bins=8, divergence: Union[str, Callable]= 'HD', tol=1e-05, n_jobs=None):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.tol = tol
|
self.tol = tol
|
||||||
self.divergence = divergence
|
self.divergence = divergence
|
||||||
|
@ -1060,8 +1058,8 @@ class SMM(AggregativeSoftQuantifier, BinaryAggregativeQuantifier):
|
||||||
validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..
|
validation distribution, or a :class:`quapy.data.base.LabelledCollection` (the split itself), or an integer indicating the number of folds (default 5)..
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier: BaseEstimator, val_split=5):
|
def __init__(self, classifier: BaseEstimator=None, val_split=5):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||||
|
@ -1109,9 +1107,9 @@ class DMy(AggregativeSoftQuantifier):
|
||||||
:param n_jobs: number of parallel workers (default None)
|
:param n_jobs: number of parallel workers (default None)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, classifier, val_split=5, nbins=8, divergence: Union[str, Callable]='HD',
|
def __init__(self, classifier: BaseEstimator=None, val_split=5, nbins=8, divergence: Union[str, Callable]='HD',
|
||||||
cdf=False, search='optim_minimize', n_jobs=None):
|
cdf=False, search='optim_minimize', n_jobs=None):
|
||||||
self.classifier = classifier
|
self.classifier = qp._get_classifier(classifier)
|
||||||
self.val_split = val_split
|
self.val_split = val_split
|
||||||
self.nbins = nbins
|
self.nbins = nbins
|
||||||
self.divergence = divergence
|
self.divergence = divergence
|
||||||
|
|
|
@ -328,7 +328,7 @@ class GridSearchQ(BaseQuantifier):
|
||||||
if self.raise_errors:
|
if self.raise_errors:
|
||||||
raise exception
|
raise exception
|
||||||
else:
|
else:
|
||||||
return ConfigStatus(params, status)
|
return ConfigStatus(params, status, msg=str(exception))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with timeout(self.timeout):
|
with timeout(self.timeout):
|
||||||
|
|
Loading…
Reference in New Issue