forked from moreo/QuaPy
initial experiments and DIR method
This commit is contained in:
parent
26de9d92eb
commit
d995990fba
|
@ -27,6 +27,40 @@ share/python-wheels/
|
|||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
.idea
|
||||
.vscode
|
||||
LeQua2022
|
||||
MultiLabel/results_generales
|
||||
MultiLabel/mlqtables
|
||||
NewMethods/plots*
|
||||
NewMethods/results*
|
||||
NewMethods/tables*
|
||||
NewMethods/latex*
|
||||
Ordinal/data*
|
||||
Ordinal/roberta*
|
||||
Ordinal/tables*
|
||||
Ordinal/results*
|
||||
eDiscovery/plots*
|
||||
eDiscovery/results*
|
||||
examples/results*
|
||||
poster-cikm*
|
||||
slides-cikm*
|
||||
slides-short-cikm*
|
||||
quick_experiment/figures*
|
||||
quick_experiment/figures*
|
||||
svm_perf_quantification/*
|
||||
TweetSentQuant/plots*
|
||||
TweetSentQuant/results*
|
||||
TweetSentQuant/tables*
|
||||
TweetSentQuant/Tweet Sentiment Quantification_NPP
|
||||
TweetSentQuant/checkpoint
|
||||
TweetSentQuant/*.tex
|
||||
checkpoint
|
||||
*.png
|
||||
*.zip
|
||||
*.pkl
|
||||
*.pickle
|
||||
*.pdf
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import pickle
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import os
|
||||
|
@ -5,52 +6,91 @@ import sys
|
|||
import pandas as pd
|
||||
|
||||
import quapy as qp
|
||||
from quapy.method.aggregative import DistributionMatching
|
||||
from quapy.method.aggregative import EMQ, DistributionMatching, PACC, HDy, OneVsAllAggregative
|
||||
from method_kdey import KDEy
|
||||
from method_dirichlety import DIRy
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import UPP
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B']
|
||||
qp.environ['N_JOBS'] = -1
|
||||
method = 'KDE'
|
||||
param = 0.1
|
||||
div = 'topsoe'
|
||||
method_identifier = f'{method}_modsel_{div}'
|
||||
result_dir = f'results_lequa'
|
||||
optim = 'mae'
|
||||
|
||||
os.makedirs('results', exist_ok=True)
|
||||
result_path = f'results_LequaT2B/{method_identifier}.csv'
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
|
||||
#if os.path.exists(result_path):
|
||||
# print('Result already exit. Nothing to do')
|
||||
# sys.exit(0)
|
||||
hyper_LR = {
|
||||
'classifier__C': np.logspace(-3,3,7),
|
||||
'classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
|
||||
with open(result_path, 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||
for method in ['PACC', 'SLD', 'DM', 'KDE', 'HDy', 'DIR']:
|
||||
|
||||
#if os.path.exists(result_path):
|
||||
# print('Result already exit. Nothing to do')
|
||||
# sys.exit(0)
|
||||
|
||||
dataset = 'T1B'
|
||||
train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset)
|
||||
result_path = f'{result_dir}/{method}'
|
||||
if os.path.exists(result_path+'.dataframe'):
|
||||
print(f'result file {result_path} already exist; skipping')
|
||||
continue
|
||||
|
||||
if method == 'KDE':
|
||||
param_grid = {'bandwidth': np.linspace(0.001, 0.1, 11)}
|
||||
model = KDEy(LogisticRegression(), divergence=div, bandwidth=param, engine='sklearn')
|
||||
else:
|
||||
raise NotImplementedError('unknown method')
|
||||
with open(result_path+'.csv', 'at') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
|
||||
|
||||
modsel = GridSearchQ(model, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1)
|
||||
dataset = 'T1B'
|
||||
train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset)
|
||||
print('init', dataset)
|
||||
if method == 'KDE':
|
||||
param_grid = {
|
||||
'bandwidth': np.linspace(0.001, 0.2, 21),
|
||||
'classifier__C': np.logspace(-4,4,9),
|
||||
'classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
quantifier = KDEy(LogisticRegression(), target='max_likelihood')
|
||||
elif method == 'DIR':
|
||||
param_grid = hyper_LR
|
||||
quantifier = DIRy(LogisticRegression())
|
||||
elif method == 'SLD':
|
||||
param_grid = hyper_LR
|
||||
quantifier = EMQ(LogisticRegression())
|
||||
elif method == 'PACC':
|
||||
param_grid = hyper_LR
|
||||
quantifier = PACC(LogisticRegression())
|
||||
elif method == 'HDy-OvA':
|
||||
param_grid = {
|
||||
'binary_quantifier__classifier__C': np.logspace(-4,4,9),
|
||||
'binary_quantifier__classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
quantifier = OneVsAllAggregative(HDy(LogisticRegression()))
|
||||
elif method == 'DM':
|
||||
param_grid = {
|
||||
'nbins': [5,10,15],
|
||||
'classifier__C': np.logspace(-4,4,9),
|
||||
'classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
quantifier = DistributionMatching(LogisticRegression())
|
||||
else:
|
||||
raise NotImplementedError('unknown method', method)
|
||||
|
||||
modsel.fit(train)
|
||||
print(f'best params {modsel.best_params_}')
|
||||
modsel = GridSearchQ(quantifier, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1, error=optim)
|
||||
|
||||
quantifier = modsel.best_model()
|
||||
modsel.fit(train)
|
||||
print(f'best params {modsel.best_params_}')
|
||||
pickle.dump(modsel.best_params_, open(f'{result_dir}/{method}_{dataset}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol=test_gen, error_metrics=['mae', 'mrae'], verbose=True)
|
||||
means = report.mean()
|
||||
csv.write(f'{method}\tLeQua-{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||
csv.flush()
|
||||
quantifier = modsel.best_model()
|
||||
|
||||
df = pd.read_csv(result_path, sep='\t')
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'], verbose=True)
|
||||
means = report.mean()
|
||||
report.to_csv(result_path+'.dataframe')
|
||||
csv.write(f'{method}\tLeQua-T1B\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
|
||||
csv.flush()
|
||||
|
||||
df = pd.read_csv(result_path+'.csv', sep='\t')
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import pickle
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import os
|
||||
|
@ -5,8 +6,9 @@ import sys
|
|||
import pandas as pd
|
||||
|
||||
import quapy as qp
|
||||
from quapy.method.aggregative import DistributionMatching
|
||||
from quapy.method.aggregative import EMQ, DistributionMatching, PACC, HDy, OneVsAllAggregative
|
||||
from method_kdey import KDEy
|
||||
from method_dirichlety import DIRy
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import UPP
|
||||
|
||||
|
@ -15,50 +17,103 @@ if __name__ == '__main__':
|
|||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
qp.environ['N_JOBS'] = -1
|
||||
method = 'KDE'
|
||||
param = 0.1
|
||||
target = 'max_likelihood'
|
||||
div = 'topsoe'
|
||||
method_identifier = f'{method}_modsel_{div if target=="min_divergence" else target}'
|
||||
n_bags_val = 250
|
||||
n_bags_test = 1000
|
||||
result_dir = f'results_tweet_{n_bags_test}'
|
||||
optim = 'mae'
|
||||
|
||||
os.makedirs('results', exist_ok=True)
|
||||
result_path = f'results/{method_identifier}.csv'
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
|
||||
#if os.path.exists(result_path):
|
||||
# print('Result already exit. Nothing to do')
|
||||
# sys.exit(0)
|
||||
hyper_LR = {
|
||||
'classifier__C': np.logspace(-4,4,9),
|
||||
'classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
|
||||
with open(result_path, 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\n')
|
||||
for method in ['PACC', 'SLD', 'DM', 'KDE', 'HDy', 'DIR']:
|
||||
|
||||
#if os.path.exists(result_path):
|
||||
# print('Result already exit. Nothing to do')
|
||||
# sys.exit(0)
|
||||
|
||||
for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
|
||||
print('init', dataset)
|
||||
result_path = f'{result_dir}/{method}'
|
||||
if os.path.exists(result_path+'.dataframe'):
|
||||
print(f'result file {result_path} already exist; skipping')
|
||||
continue
|
||||
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=True)
|
||||
with open(result_path+'.csv', 'at') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n')
|
||||
|
||||
if method == 'KDE':
|
||||
param_grid = {'bandwidth': np.linspace(0.001, 0.2, 21)}
|
||||
model = KDEy(LogisticRegression(), divergence=div, bandwidth=param, engine='sklearn', target=target)
|
||||
else:
|
||||
raise NotImplementedError('unknown method')
|
||||
# four semeval dataset share the training, so it is useless to optimize hyperparameters four times;
|
||||
# this variable controls that the mod sel has already been done, and skip this otherwise
|
||||
semeval_trained = False
|
||||
|
||||
protocol = UPP(data.test, repeats=100)
|
||||
modsel = GridSearchQ(model, param_grid, protocol, refit=False, n_jobs=-1, verbose=1)
|
||||
for dataset in qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST:
|
||||
print('init', dataset)
|
||||
|
||||
modsel.fit(data.training)
|
||||
print(f'best params {modsel.best_params_}')
|
||||
is_semeval = dataset.startswith('semeval')
|
||||
|
||||
quantifier = modsel.best_model()
|
||||
if not is_semeval or not semeval_trained:
|
||||
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=False)
|
||||
quantifier.fit(data.training)
|
||||
protocol = UPP(data.test, repeats=100)
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae'], verbose=True)
|
||||
means = report.mean()
|
||||
csv.write(f'{method_identifier}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\n')
|
||||
csv.flush()
|
||||
if method == 'KDE':
|
||||
param_grid = {
|
||||
'bandwidth': np.linspace(0.001, 0.2, 21),
|
||||
'classifier__C': np.logspace(-4,4,9),
|
||||
'classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
quantifier = KDEy(LogisticRegression(), target='max_likelihood')
|
||||
elif method == 'DIR':
|
||||
param_grid = hyper_LR
|
||||
quantifier = DIRy(LogisticRegression())
|
||||
elif method == 'SLD':
|
||||
param_grid = hyper_LR
|
||||
quantifier = EMQ(LogisticRegression())
|
||||
elif method == 'PACC':
|
||||
param_grid = hyper_LR
|
||||
quantifier = PACC(LogisticRegression())
|
||||
elif method == 'HDy-OvA':
|
||||
param_grid = {
|
||||
'binary_quantifier__classifier__C': np.logspace(-4,4,9),
|
||||
'binary_quantifier__classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
quantifier = OneVsAllAggregative(HDy(LogisticRegression()))
|
||||
elif method == 'DM':
|
||||
param_grid = {
|
||||
'nbins': [5,10,15],
|
||||
'classifier__C': np.logspace(-4,4,9),
|
||||
'classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
quantifier = DistributionMatching(LogisticRegression())
|
||||
else:
|
||||
raise NotImplementedError('unknown method', method)
|
||||
|
||||
df = pd.read_csv(result_path, sep='\t')
|
||||
# model selection
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=True)
|
||||
|
||||
protocol = UPP(data.test, repeats=n_bags_val)
|
||||
modsel = GridSearchQ(quantifier, param_grid, protocol, refit=False, n_jobs=-1, verbose=1, error=optim)
|
||||
|
||||
modsel.fit(data.training)
|
||||
print(f'best params {modsel.best_params_}')
|
||||
pickle.dump(modsel.best_params_, open(f'{result_dir}/{method}_{dataset}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
quantifier = modsel.best_model()
|
||||
|
||||
if is_semeval:
|
||||
semeval_trained = True
|
||||
|
||||
else:
|
||||
print(f'model selection for {dataset} already done; skipping')
|
||||
|
||||
data = qp.datasets.fetch_twitter(dataset, min_df=3, pickle=True, for_model_selection=False)
|
||||
quantifier.fit(data.training)
|
||||
protocol = UPP(data.test, repeats=n_bags_test)
|
||||
report = qp.evaluation.evaluation_report(quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True)
|
||||
report.to_csv(result_path+'.dataframe')
|
||||
means = report.mean()
|
||||
csv.write(f'{method}\t{data.name}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n')
|
||||
csv.flush()
|
||||
|
||||
df = pd.read_csv(result_path+'.csv', sep='\t')
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
import os
|
||||
import sys
|
||||
from typing import Union, Callable
|
||||
import numpy as np
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
import quapy as qp
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.protocol import APP, UPP
|
||||
from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
|
||||
DistributionMatching, _get_divergence
|
||||
import scipy
|
||||
from scipy import optimize
|
||||
from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
|
||||
import dirichlet
|
||||
|
||||
|
||||
class DIRy(AggregativeProbabilisticQuantifier):
|
||||
|
||||
def __init__(self, classifier: BaseEstimator, val_split=0.4, n_jobs=None, target='max_likelihood'):
|
||||
self.classifier = classifier
|
||||
self.val_split = val_split
|
||||
self.n_jobs = n_jobs
|
||||
self.target = target
|
||||
|
||||
def fit(self, data: LabelledCollection, fit_classifier=True, val_split: Union[float, LabelledCollection] = None):
|
||||
|
||||
if val_split is None:
|
||||
val_split = self.val_split
|
||||
|
||||
self.classifier, y, posteriors, _, _ = cross_generate_predictions(
|
||||
data, self.classifier, val_split, probabilistic=True, fit_classifier=fit_classifier, n_jobs=self.n_jobs
|
||||
)
|
||||
|
||||
self.val_parameters = [dirichlet.mle(posteriors[y == cat]) for cat in range(data.n_classes)]
|
||||
|
||||
return self
|
||||
|
||||
def val_pdf(self, prev):
|
||||
"""
|
||||
Returns a function that computes the mixture model with the given prev as mixture factor
|
||||
:param prev: a prevalence vector, ndarray
|
||||
:return: a function implementing the validation distribution with fixed mixture factor
|
||||
"""
|
||||
return lambda posteriors: sum(prev_i * dirichlet.pdf(parameters_i)(posteriors) for parameters_i, prev_i in zip(self.val_parameters, prev))
|
||||
|
||||
def aggregate(self, posteriors: np.ndarray):
|
||||
if self.target == 'min_divergence':
|
||||
raise NotImplementedError('not yet')
|
||||
return self._target_divergence(posteriors)
|
||||
elif self.target == 'max_likelihood':
|
||||
return self._target_likelihood(posteriors)
|
||||
else:
|
||||
raise ValueError('unknown target')
|
||||
|
||||
def _target_divergence(self, posteriors):
|
||||
test_density = self.get_kde(posteriors)
|
||||
# val_test_posteriors = np.concatenate([self.val_posteriors, posteriors])
|
||||
test_likelihood = self.pdf(test_density, posteriors)
|
||||
divergence = _get_divergence(self.divergence)
|
||||
|
||||
n_classes = len(self.val_densities)
|
||||
|
||||
def match(prev):
|
||||
val_pdf = self.val_pdf(prev)
|
||||
val_likelihood = val_pdf(posteriors)
|
||||
|
||||
#for i,prev_i in enumerate(prev):
|
||||
|
||||
return divergence(val_likelihood, test_likelihood)
|
||||
|
||||
# the initial point is set as the uniform distribution
|
||||
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
|
||||
# solutions are bounded to those contained in the unit-simplex
|
||||
bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1]
|
||||
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
|
||||
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
return r.x
|
||||
|
||||
def _target_likelihood(self, posteriors, eps=0.000001):
|
||||
n_classes = len(self.val_parameters)
|
||||
|
||||
def neg_loglikelihood(prev):
|
||||
val_pdf = self.val_pdf(prev)
|
||||
test_likelihood = val_pdf(posteriors)
|
||||
test_loglikelihood = np.log(test_likelihood + eps)
|
||||
return -np.sum(test_loglikelihood)
|
||||
|
||||
# the initial point is set as the uniform distribution
|
||||
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
|
||||
# solutions are bounded to those contained in the unit-simplex
|
||||
bounds = tuple((0, 1) for _ in range(n_classes)) # values in [0,1]
|
||||
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x)}) # values summing up to 1
|
||||
r = optimize.minimize(neg_loglikelihood, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
return r.x
|
|
@ -9,8 +9,8 @@ from sklearn.model_selection import GridSearchCV
|
|||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
import quapy as qp
|
||||
from data import LabelledCollection
|
||||
from protocol import APP, UPP
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.protocol import APP, UPP
|
||||
from quapy.method.aggregative import AggregativeProbabilisticQuantifier, _training_helper, cross_generate_predictions, \
|
||||
DistributionMatching, _get_divergence
|
||||
import scipy
|
||||
|
@ -22,16 +22,6 @@ from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional
|
|||
# TODO: think of a MMD-y variant, i.e., a MMD variant that uses the points in the simplex and possibly any non-linear kernel
|
||||
|
||||
|
||||
class SklearnKDE:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def fit(self):
|
||||
pass
|
||||
|
||||
def likelihood(self):
|
||||
pass
|
||||
|
||||
|
||||
class KDEy(AggregativeProbabilisticQuantifier):
|
||||
|
||||
|
@ -163,8 +153,6 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
|||
val_pdf = self.val_pdf(prev)
|
||||
val_likelihood = val_pdf(posteriors)
|
||||
|
||||
#for i,prev_i in enumerate(prev):
|
||||
|
||||
return divergence(val_likelihood, test_likelihood)
|
||||
|
||||
# the initial point is set as the uniform distribution
|
||||
|
@ -176,7 +164,7 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
|||
r = optimize.minimize(match, x0=uniform_distribution, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
return r.x
|
||||
|
||||
def _target_likelihood(self, posteriors):
|
||||
def _target_likelihood(self, posteriors, eps=0.000001):
|
||||
"""
|
||||
Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution
|
||||
(the mixture) that best matches the test distribution, in terms of the divergence measure of choice.
|
||||
|
@ -189,8 +177,9 @@ class KDEy(AggregativeProbabilisticQuantifier):
|
|||
def neg_loglikelihood(prev):
|
||||
val_pdf = self.val_pdf(prev)
|
||||
test_likelihood = val_pdf(posteriors)
|
||||
test_loglikelihood = np.log(test_likelihood)
|
||||
return - np.sum(test_loglikelihood)
|
||||
test_loglikelihood = np.log(test_likelihood + eps)
|
||||
return -np.sum(test_loglikelihood)
|
||||
#return -np.prod(test_likelihood)
|
||||
|
||||
# the initial point is set as the uniform distribution
|
||||
uniform_distribution = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
|
|
|
@ -2,7 +2,8 @@ import sys
|
|||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
result_dir = 'results'
|
||||
#result_dir = 'results_tweet_1000'
|
||||
result_dir = 'results_lequa'
|
||||
|
||||
dfs = []
|
||||
|
||||
|
@ -11,19 +12,27 @@ for path in pathlist:
|
|||
path_in_str = str(path)
|
||||
print(path_in_str)
|
||||
|
||||
df = pd.read_csv(path_in_str, sep='\t')
|
||||
|
||||
dfs.append(df)
|
||||
try:
|
||||
df = pd.read_csv(path_in_str, sep='\t')
|
||||
if not df.empty:
|
||||
dfs.append(df)
|
||||
except Exception:
|
||||
print('empty')
|
||||
|
||||
df = pd.concat(dfs)
|
||||
|
||||
piv = df.pivot_table(index='Dataset', columns='Method', values='MRAE')
|
||||
piv.loc['mean'] = piv.mean()
|
||||
for err in ['MAE', 'MRAE']:
|
||||
print('-'*100)
|
||||
print(err)
|
||||
print('-'*100)
|
||||
piv = df.pivot_table(index='Dataset', columns='Method', values=err)
|
||||
piv.loc['mean'] = piv.mean()
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pd.set_option('expand_frame_repr', False)
|
||||
print(piv)
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pd.set_option('expand_frame_repr', False)
|
||||
print(piv)
|
||||
print()
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -4,6 +4,11 @@ y el otro es un KDE en test), de las que luego se calculará la divergencia (obj
|
|||
generar solo una distribución (mixture model de train) y tomar la likelihood de los puntos de test como objetivo
|
||||
a maximizar.
|
||||
|
||||
- quedarse con hyperparametros mejores por verlos
|
||||
- sacar los dataframes en resultados para hcer test estadisticos
|
||||
- hacer dibujitos
|
||||
|
||||
|
||||
1) aclarar: only test?
|
||||
2) implementar el auto
|
||||
- optimización interna para likelihood [ninguno parece funcionar bien]
|
||||
|
|
Loading…
Reference in New Issue