distribution matching multiclass
This commit is contained in:
parent
4a7ffe5b50
commit
26de9d92eb
|
@ -0,0 +1,58 @@
|
|||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
import quapy as qp
|
||||
from quapy.method.aggregative import DistributionMatching
|
||||
from method_kdey import KDEy
|
||||
from quapy.model_selection import GridSearchQ
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B']
|
||||
qp.environ['N_JOBS'] = -1
|
||||
method = 'KDE'
|
||||
param = 0.1
|
||||
div = 'topsoe'
|
||||
method_identifier = f'{method}_modsel_{div}'
|
||||
|
||||
os.makedirs('results', exist_ok=True)
|
||||
result_path = f'results_LequaT2B/{method_identifier}.csv'
|
||||
|
||||
#if os.path.exists(result_path):
|
||||
# print('Result already exit. Nothing to do')
|
||||
# sys.exit(0)
|
||||
|
||||
with open(result_path, 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||
|
||||
dataset = 'T1B'
|
||||
train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset)
|
||||
|
||||
if method == 'KDE':
|
||||
param_grid = {'bandwidth': np.linspace(0.001, 0.1, 11)}
|
||||
model = KDEy(LogisticRegression(), divergence=div, bandwidth=param, engine='sklearn')
|
||||
else:
|
||||
raise NotImplementedError('unknown method')
|
||||
|
||||
modsel = GridSearchQ(model, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1)
|
||||
|
||||
modsel.fit(train)
|
||||
print(f'best params {modsel.best_params_}')
|
||||
|
||||
quantifier = modsel.best_model()
|
||||
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol=test_gen, error_metrics=['mae', 'mrae'], verbose=True)
|
||||
means = report.mean()
|
||||
csv.write(f'{method}\tLeQua-{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||
csv.flush()
|
||||
|
||||
df = pd.read_csv(result_path, sep='\t')
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
|
||||
print(pv)
|
|
@ -0,0 +1,66 @@
|
|||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
import quapy as qp
|
||||
from quapy.method.aggregative import DistributionMatching
|
||||
from method_kdey import KDEy
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import UPP
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
qp.environ['N_JOBS'] = -1
|
||||
method = 'KDE'
|
||||
param = 0.1
|
||||
target = 'max_likelihood'
|
||||
div = 'topsoe'
|
||||
method_identifier = f'{method}_modsel_{div if target=="min_divergence" else target}'
|
||||
|
||||
os.makedirs('results', exist_ok=True)
|
||||
result_path = f'results/{method_identifier}.csv'
|
||||
|
||||
#if os.path.exists(result_path):
|
||||
# print('Result already exit. Nothing to do')
|
||||
# sys.exit(0)
|
||||
|
||||
with open(result_path, 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||
|
||||
for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
|
||||
print('init', dataset)
|
||||
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=True)
|
||||
|
||||
if method == 'KDE':
|
||||
param_grid = {'bandwidth': np.linspace(0.001, 0.2, 21)}
|
||||
model = KDEy(LogisticRegression(), divergence=div, bandwidth=param, engine='sklearn', target=target)
|
||||
else:
|
||||
raise NotImplementedError('unknown method')
|
||||
|
||||
protocol = UPP(data.test, repeats=100)
|
||||
modsel = GridSearchQ(model, param_grid, protocol, refit=False, n_jobs=-1, verbose=1)
|
||||
|
||||
modsel.fit(data.training)
|
||||
print(f'best params {modsel.best_params_}')
|
||||
|
||||
quantifier = modsel.best_model()
|
||||
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=False)
|
||||
quantifier.fit(data.training)
|
||||
protocol = UPP(data.test, repeats=100)
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae'], verbose=True)
|
||||
means = report.mean()
|
||||
csv.write(f'{method_identifier}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||
csv.flush()
|
||||
|
||||
df = pd.read_csv(result_path, sep='\t')
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
|
||||
print(pv)
|
|
@ -0,0 +1,62 @@
|
|||
from sklearn.linear_model import LogisticRegression
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
import quapy as qp
|
||||
from method.aggregative import DistributionMatching
|
||||
from method_kdey import KDEy
|
||||
from protocol import UPP
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
qp.environ['N_JOBS'] = -1
|
||||
method = 'KDE'
|
||||
param = 0.1
|
||||
div = 'topsoe'
|
||||
method_identifier = f'{method}_{param}_{div}'
|
||||
|
||||
# generates tuples (dataset, method, method_name)
|
||||
# (the dataset is needed for methods that process the dataset differently)
|
||||
def gen_methods():
|
||||
|
||||
for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
|
||||
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True)
|
||||
|
||||
if method == 'KDE':
|
||||
kdey = KDEy(LogisticRegression(), divergence=div, bandwidth=param, engine='sklearn')
|
||||
yield data, kdey, method_identifier
|
||||
|
||||
elif method == 'DM':
|
||||
dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=param)
|
||||
yield data, dm, method_identifier
|
||||
|
||||
else:
|
||||
raise NotImplementedError('unknown method')
|
||||
|
||||
os.makedirs('results', exist_ok=True)
|
||||
result_path = f'results/{method_identifier}.csv'
|
||||
|
||||
if os.path.exists(result_path):
|
||||
print('Result already exit. Nothing to do')
|
||||
sys.exit(0)
|
||||
|
||||
with open(result_path, 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||
for data, quantifier, quant_name in gen_methods():
|
||||
quantifier.fit(data.training)
|
||||
protocol = UPP(data.test, repeats=100)
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae'], verbose=True)
|
||||
means = report.mean()
|
||||
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||
csv.flush()
|
||||
|
||||
df = pd.read_csv(result_path, sep='\t')
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
|
||||
print(pv)
|
|
@ -1,8 +1,11 @@
|
|||
import os
|
||||
import sys
|
||||
from typing import Union, Callable
|
||||
import numpy as np
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
import quapy as qp
|
||||
|
@ -12,56 +15,96 @@ from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _traini
|
|||
DistributionMatching, _get_divergence
|
||||
import scipy
|
||||
from scipy import optimize
|
||||
from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
|
||||
|
||||
|
||||
# TODO: optimize the bandwidth automatically
|
||||
# TODO: replace the l2 metric in the kernel with the EMD, try to visualize the difference between both criteria in a 3-simplex
|
||||
# TODO: think of a MMD-y variant, i.e., a MMD variant that uses the points in the simplex and possibly any non-linear kernel
|
||||
|
||||
|
||||
class SklearnKDE:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def fit(self):
|
||||
pass
|
||||
|
||||
def likelihood(self):
|
||||
pass
|
||||
|
||||
|
||||
class KDEy(AggregativeProbabilisticQuantifier):
|
||||
|
||||
BANDWIDTH_METHOD = ['auto', 'scott', 'silverman']
|
||||
ENGINE = ['scipy', 'sklearn']
|
||||
ENGINE = ['scipy', 'sklearn', 'statsmodels']
|
||||
TARGET = ['min_divergence', 'max_likelihood']
|
||||
|
||||
def __init__(self, classifier: BaseEstimator, val_split=0.4, divergence: Union[str, Callable]='HD',
|
||||
bandwidth_method='scott', engine='sklearn', n_jobs=None):
|
||||
bandwidth='scott', engine='sklearn', target='min_divergence', n_jobs=None):
|
||||
assert bandwidth in KDEy.BANDWIDTH_METHOD or isinstance(bandwidth, float), \
|
||||
f'unknown bandwidth_method, valid ones are {KDEy.BANDWIDTH_METHOD}'
|
||||
assert engine in KDEy.ENGINE, f'unknown engine, valid ones are {KDEy.ENGINE}'
|
||||
assert target in KDEy.TARGET, f'unknown target, valid ones are {KDEy.TARGET}'
|
||||
self.classifier = classifier
|
||||
self.val_split = val_split
|
||||
self.divergence = divergence
|
||||
self.bandwidth_method = bandwidth_method
|
||||
self.bandwidth = bandwidth
|
||||
self.engine = engine
|
||||
self.target = target
|
||||
self.n_jobs = n_jobs
|
||||
assert bandwidth_method in KDEy.BANDWIDTH_METHOD, f'unknown bandwidth_method, valid ones are {KDEy.BANDWIDTH_METHOD}'
|
||||
assert engine in KDEy.ENGINE, f'unknown engine, valid ones are {KDEy.ENGINE}'
|
||||
|
||||
def search_bandwidth_maxlikelihood(self, posteriors, labels):
|
||||
grid = {'bandwidth': np.linspace(0.001, 0.2, 100)}
|
||||
search = GridSearchCV(
|
||||
KernelDensity(), param_grid=grid, n_jobs=-1, cv=50, verbose=1, refit=True
|
||||
)
|
||||
search.fit(posteriors, labels)
|
||||
bandwidth = search.best_params_['bandwidth']
|
||||
print(f'auto: bandwidth={bandwidth:.5f}')
|
||||
return bandwidth
|
||||
|
||||
def get_kde(self, posteriors):
|
||||
# if self.bandwidth == 'auto':
|
||||
# print('adjusting bandwidth')
|
||||
#
|
||||
# if self.engine == 'sklearn':
|
||||
# grid = {'bandwidth': np.linspace(0.001,0.2,41)}
|
||||
# search = GridSearchCV(
|
||||
# KernelDensity(), param_grid=grid, n_jobs=-1, cv=10, verbose=1, refit=True
|
||||
# )
|
||||
# search.fit(posteriors)
|
||||
# print(search.best_score_)
|
||||
# print(search.best_params_)
|
||||
#
|
||||
# import pandas as pd
|
||||
# df = pd.DataFrame(search.cv_results_)
|
||||
# pd.set_option('display.max_columns', None)
|
||||
# pd.set_option('display.max_rows', None)
|
||||
# pd.set_option('expand_frame_repr', False)
|
||||
# print(df)
|
||||
# sys.exit(0)
|
||||
#
|
||||
# kde = search
|
||||
|
||||
#else:
|
||||
if self.engine == 'scipy':
|
||||
# scipy treats columns as datapoints, and need the datapoints not to lie in a lower-dimensional subspace, which
|
||||
# requires removing the last dimension which is constrained
|
||||
posteriors = posteriors[:,:-1].T
|
||||
kde = scipy.stats.gaussian_kde(posteriors)
|
||||
kde.set_bandwidth(self.bandwidth_method)
|
||||
kde.set_bandwidth(self.bandwidth)
|
||||
elif self.engine == 'sklearn':
|
||||
kde = KernelDensity(bandwidth=self.bandwidth_method).fit(posteriors)
|
||||
kde = KernelDensity(bandwidth=self.bandwidth).fit(posteriors)
|
||||
return kde
|
||||
|
||||
def pdf(self, kde, posteriors):
|
||||
if self.engine == 'scipy':
|
||||
return kde(posteriors[:,:-1].T)
|
||||
return kde(posteriors[:, :-1].T)
|
||||
elif self.engine == 'sklearn':
|
||||
return np.exp(kde.score_samples(posteriors))
|
||||
|
||||
def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
|
||||
"""
|
||||
Trains the classifier (if requested) and generates the validation distributions out of the training data.
|
||||
The validation distributions have shape `(n, ch, nbins)`, with `n` the number of classes, `ch` the number of
|
||||
channels (a channel is a description, in form of a histogram, of a specific class -- there are as many channels
|
||||
as classes, although in the binary case one can use only one channel, since the other one is constrained),
|
||||
and `nbins` the number of bins. In particular, let `V` be the validation distributions; `di=V[i]`
|
||||
are the distributions obtained from training data labelled with class `i`; `dij = di[j]` is the discrete
|
||||
distribution of posterior probabilities `P(Y=j|X=x)` for training data labelled with class `i`, and `dij[k]`
|
||||
is the fraction of instances with a value in the `k`-th bin.
|
||||
|
||||
:param data: the training set
|
||||
:param fit_classifier: set to False to bypass the training (the learner is assumed to be already fit)
|
||||
|
@ -77,6 +120,9 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
|||
data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
|
||||
)
|
||||
|
||||
if self.bandwidth == 'auto':
|
||||
self.bandwidth = self.search_bandwidth_maxlikelihood(posteriors, y)
|
||||
|
||||
self.val_densities = [self.get_kde(posteriors[y == cat]) for cat in range(data.n_classes)]
|
||||
self.val_posteriors = posteriors
|
||||
|
||||
|
@ -90,14 +136,18 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
|||
"""
|
||||
return lambda posteriors: sum(prev_i * self.pdf(kde_i, posteriors) for kde_i, prev_i in zip(self.val_densities, prev))
|
||||
|
||||
|
||||
def aggregate(self, posteriors: np.ndarray):
|
||||
if self.target == 'min_divergence':
|
||||
return self._target_divergence(posteriors)
|
||||
elif self.target == 'max_likelihood':
|
||||
return self._target_likelihood(posteriors)
|
||||
else:
|
||||
raise ValueError('unknown target')
|
||||
|
||||
def _target_divergence(self, posteriors):
|
||||
"""
|
||||
Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution
|
||||
(the mixture) that best matches the test distribution, in terms of the divergence measure of choice.
|
||||
In the multiclass case, with `n` the number of classes, the test and mixture distributions contain
|
||||
`n` channels (proper distributions of binned posterior probabilities), on which the divergence is computed
|
||||
independently. The matching is computed as an average of the divergence across all channels.
|
||||
|
||||
:param instances: instances in the sample
|
||||
:return: a vector of class prevalence estimates
|
||||
|
@ -107,12 +157,14 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
|||
test_likelihood = self.pdf(test_density, posteriors)
|
||||
divergence = _get_divergence(self.divergence)
|
||||
|
||||
|
||||
n_classes = len(self.val_densities)
|
||||
|
||||
def match(prev):
|
||||
val_pdf = self.val_pdf(prev)
|
||||
val_likelihood = val_pdf(posteriors)
|
||||
val_likelihood = val_pdf(posteriors)
|
||||
|
||||
#for i,prev_i in enumerate(prev):
|
||||
|
||||
return divergence(val_likelihood, test_likelihood)
|
||||
|
||||
# the initial point is set as the uniform distribution
|
||||
|
@ -124,50 +176,27 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
|||
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
return r.x
|
||||
|
||||
def _target_likelihood(self, posteriors):
|
||||
"""
|
||||
Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution
|
||||
(the mixture) that best matches the test distribution, in terms of the divergence measure of choice.
|
||||
|
||||
:param instances: instances in the sample
|
||||
:return: a vector of class prevalence estimates
|
||||
"""
|
||||
n_classes = len(self.val_densities)
|
||||
|
||||
if __name__ == '__main__':
|
||||
def neg_loglikelihood(prev):
|
||||
val_pdf = self.val_pdf(prev)
|
||||
test_likelihood = val_pdf(posteriors)
|
||||
test_loglikelihood = np.log(test_likelihood)
|
||||
return - np.sum(test_loglikelihood)
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
qp.environ['N_JOBS'] = -1
|
||||
div = 'HD'
|
||||
# the initial point is set as the uniform distribution
|
||||
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
|
||||
# generates tuples (dataset, method, method_name)
|
||||
# (the dataset is needed for methods that process the dataset differently)
|
||||
def gen_methods():
|
||||
|
||||
for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
|
||||
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True)
|
||||
|
||||
# kdey = KDEy(LogisticRegression(), divergence=div, bandwidth_method='scott')
|
||||
# yield data, kdey, f'KDEy-{div}-scott'
|
||||
|
||||
kdey = KDEy(LogisticRegression(), divergence=div, bandwidth_method='silverman', engine='sklearn')
|
||||
yield data, kdey, f'KDEy-{div}-silverman'
|
||||
|
||||
dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=5)
|
||||
yield data, dm, f'DM-5b-{div}'
|
||||
|
||||
# dm = DistributionMatching(LogisticRegression(), divergence=div, nbins=10)
|
||||
# yield data, dm, f'DM-10b-{div}'
|
||||
|
||||
|
||||
result_path = 'results_kdey.csv'
|
||||
with open(result_path, 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||
for data, quantifier, quant_name in gen_methods():
|
||||
quantifier.fit(data.training)
|
||||
protocol = UPP(data.test, repeats=100)
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae','mrae'], verbose=True)
|
||||
means = report.mean()
|
||||
csv.write(f'{quant_name}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||
csv.flush()
|
||||
|
||||
df = pd.read_csv(result_path, sep='\t')
|
||||
# print(df)
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE", "MRAE"])
|
||||
print(pv)
|
||||
# solutions are bounded to those contained in the unit-simplex
|
||||
bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1]
|
||||
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
|
||||
r = optimize.minimize(neg_loglikelihood, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
return r.x
|
|
@ -0,0 +1,32 @@
|
|||
import sys
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
result_dir = 'results'
|
||||
|
||||
dfs = []
|
||||
|
||||
pathlist = Path(result_dir).rglob('*.csv')
|
||||
for path in pathlist:
|
||||
path_in_str = str(path)
|
||||
print(path_in_str)
|
||||
|
||||
df = pd.read_csv(path_in_str, sep='\t')
|
||||
|
||||
dfs.append(df)
|
||||
|
||||
df = pd.concat(dfs)
|
||||
|
||||
piv = df.pivot_table(index='Dataset', columns='Method', values='MRAE')
|
||||
piv.loc['mean'] = piv.mean()
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pd.set_option('expand_frame_repr', False)
|
||||
print(piv)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
Cosa fundamental:
|
||||
KDE se puede usar para generar 2 distribuciones (una, es un mixture model de KDEs en train condicionados a cada clase,
|
||||
y el otro es un KDE en test), de las que luego se calculará la divergencia (objetivo a minimizar). Otra opción es
|
||||
generar solo una distribución (mixture model de train) y tomar la likelihood de los puntos de test como objetivo
|
||||
a maximizar.
|
||||
|
||||
1) aclarar: only test?
|
||||
2) implementar el auto
|
||||
- optimización interna para likelihood [ninguno parece funcionar bien]
|
||||
- de todo (e.g., todo el training)?
|
||||
- independiente para cada conjunto etiquetado? (e.g., positivos, negativos, neutros, y test)
|
||||
- optimización como un parámetro GridSearchQ
|
||||
3) aclarar: topsoe?
|
||||
4) otro tipo de model selection?
|
||||
5) aumentar numero de bags
|
||||
6) optimizar parametro C? optimizar kernel? optimizar distancia?
|
||||
7) KDE de sklearn o multivariate KDE de statsmodel? ver también qué es esto (parece que da P(Y|X) o sea que podría
|
||||
eliminar el clasificador?):
|
||||
https://www.statsmodels.org/dev/_modules/statsmodels/nonparametric/kernel_density.html#KDEMultivariateConditional
|
||||
8) quitar la ultima dimension en sklearn también?
|
||||
9) optimizar para RAE en vez de AE?
|
|
@ -88,7 +88,7 @@ class GridSearchQ(BaseQuantifier):
|
|||
|
||||
hyper = [dict({k: val[i] for i, k in enumerate(params_keys)}) for val in itertools.product(*params_values)]
|
||||
self._sout(f'starting model selection with {self.n_jobs =}')
|
||||
#pass a seed to parallel so it is set in clild processes
|
||||
# pass a seed to parallel so it is set in clild processes
|
||||
scores = qp.util.parallel(
|
||||
self._delayed_eval,
|
||||
((params, training) for params in hyper),
|
||||
|
|
Loading…
Reference in New Issue