cleaning last experiments for report
This commit is contained in:
parent
9aecdad66f
commit
10595246a9
|
@ -74,12 +74,11 @@ def plot(xaxis, metrics_measurements, metrics_names, suffix):
|
|||
plt.close()
|
||||
|
||||
|
||||
def plot_stack(xaxis, metrics_measurements, metrics_names, suffix):
|
||||
def plot_stack(xaxis, metrics_measurements, metrics_names, figname):
|
||||
|
||||
# Crear la figura y los ejes (4 bloques verticales)
|
||||
fig, axs = plt.subplots(4, 1, figsize=(8, 12))
|
||||
n_measures = len(metrics_measurements)//2
|
||||
|
||||
x = xaxis
|
||||
fig, axs = plt.subplots(n_measures, 1, figsize=(8, 3*n_measures))
|
||||
|
||||
indexes = np.arange(len(metrics_measurements))
|
||||
axs_idx = 0
|
||||
|
@ -105,6 +104,9 @@ def plot_stack(xaxis, metrics_measurements, metrics_names, suffix):
|
|||
|
||||
# axs[axs_idx].set_title(f'{metric_te_name} and {metric_tr_name}')
|
||||
axs[axs_idx].legend(loc='lower right')
|
||||
axs[axs_idx].set_xscale('log')
|
||||
if axs_idx==0:
|
||||
axs[axs_idx].set_title(dataset)
|
||||
if axs_idx < len(indexes)//2 -1:
|
||||
axs[axs_idx].set_xticks([])
|
||||
|
||||
|
@ -120,7 +122,7 @@ def plot_stack(xaxis, metrics_measurements, metrics_names, suffix):
|
|||
# plt.show()
|
||||
os.makedirs('./plots/likelihood/', exist_ok=True)
|
||||
|
||||
plt.savefig(f'./plots/likelihood/{dataset}-fig{suffix}.png')
|
||||
plt.savefig(f'./plots/likelihood/{figname}.png')
|
||||
plt.close()
|
||||
|
||||
|
||||
|
@ -199,7 +201,7 @@ qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
|
|||
show_ae = True
|
||||
show_rae = True
|
||||
show_mse = False
|
||||
show_kld = True
|
||||
show_kld = False
|
||||
normalize = True
|
||||
|
||||
epsilon = 1e-10
|
||||
|
@ -259,7 +261,7 @@ for i, dataset in enumerate(tqdm(DATASETS, desc='processing datasets', total=len
|
|||
# measurement_names.append('NLL(te)')
|
||||
# measurement_names.append('NLL(tr)')
|
||||
# plot(xaxis, measurements, measurement_names, suffix='AVEtr')
|
||||
plot_stack(xaxis, measurements, measurement_names, suffix='AVEtr')
|
||||
plot_stack(xaxis, measurements, measurement_names, figname=f'{i}.png')
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
import pickle
|
||||
import os
|
||||
from time import time
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2
|
||||
from quapy.method.aggregative import PACC, EMQ, KDEyML
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import UPP
|
||||
from pathlib import Path
|
||||
|
||||
from result_table.src.table import Table
|
||||
|
||||
SEED = 1
|
||||
|
||||
|
||||
def newLR():
|
||||
return LogisticRegression(max_iter=3000)
|
||||
|
||||
|
||||
# typical hyperparameters explored for Logistic Regression
|
||||
logreg_grid = {
|
||||
'C': np.logspace(-4,4,9),
|
||||
'class_weight': [None, 'balanced']
|
||||
}
|
||||
|
||||
|
||||
def wrap_hyper(classifier_hyper_grid: dict):
|
||||
return {'classifier__' + k: v for k, v in classifier_hyper_grid.items()}
|
||||
|
||||
|
||||
METHODS = [
|
||||
# ('PACC', PACC(newLR()), wrap_hyper(logreg_grid)),
|
||||
# ('EMQ', EMQ(newLR()), wrap_hyper(logreg_grid)),/
|
||||
('KDEy', KDEyML(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}),
|
||||
# ('KDEy-MLred', KDEyMLred(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}),
|
||||
('KDEy-scott', KDEyML(newLR(), bandwidth='scott'), wrap_hyper(logreg_grid)),
|
||||
('KDEy-silver', KDEyML(newLR(), bandwidth='silverman'), wrap_hyper(logreg_grid)),
|
||||
('KDEy-NLL', KDEyMLauto2(newLR(), bandwidth='auto', target='likelihood', search='grid'), wrap_hyper(logreg_grid)),
|
||||
('KDEy-NLL+', KDEyMLauto2(newLR(), bandwidth='auto', target='likelihood', search='optim'), wrap_hyper(logreg_grid)),
|
||||
('KDEy-AE', KDEyMLauto2(newLR(), bandwidth='auto', target='mae', search='grid'), wrap_hyper(logreg_grid)),
|
||||
('KDEy-AE+', KDEyMLauto2(newLR(), bandwidth='auto', target='mae', search='optim'), wrap_hyper(logreg_grid)),
|
||||
('KDEy-RAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae', search='grid'), wrap_hyper(logreg_grid)),
|
||||
('KDEy-RAE+', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae', search='optim'), wrap_hyper(logreg_grid)),
|
||||
]
|
||||
|
||||
|
||||
"""
|
||||
TKDEyML era primero bandwidth (init 0.05) y luego prevalence (init uniform)
|
||||
TKDEyML2 era primero prevalence (init uniform) y luego bandwidth (init 0.05)
|
||||
TKDEyML3 era primero prevalence (init uniform) y luego bandwidth (init 0.1)
|
||||
TKDEyML4 es como ML2 pero max 5 iteraciones por optimización
|
||||
"""
|
||||
TRANSDUCTIVE_METHODS = [
|
||||
#('TKDEy-ML', KDEyMLauto(newLR()), None),
|
||||
# ('TKDEy-both', KDEyMLauto(newLR(), optim='both'), None),
|
||||
# ('TKDEy-bothfine', KDEyMLauto(newLR(), optim='both_fine'), None),
|
||||
# ('TKDEy-two', KDEyMLauto(newLR(), optim='two_steps'), None),
|
||||
# ('TKDEy-MLike', KDEyMLauto(newLR(), optim='max_likelihood'), None),
|
||||
# ('TKDEy-MLike2', KDEyMLauto(newLR(), optim='max_likelihood2'), None),
|
||||
#('TKDEy-ML3', KDEyMLauto(newLR()), None),
|
||||
#('TKDEy-ML4', KDEyMLauto(newLR()), None),
|
||||
]
|
||||
|
||||
def show_results(result_path, tables, tables_path='./tables/main.pdf'):
|
||||
import pandas as pd
|
||||
df = pd.read_csv(result_path + '.csv', sep='\t')
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.max_rows', None)
|
||||
pd.set_option('display.width', 1000) # Ajustar el ancho máximo
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE"], margins=True)
|
||||
print(pv)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["MRAE"], margins=True)
|
||||
print(pv)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["KLD"], margins=True)
|
||||
print(pv)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["TR-TIME"], margins=True)
|
||||
print(pv)
|
||||
pv = df.pivot_table(index='Dataset', columns="Method", values=["TE-TIME"], margins=True)
|
||||
print(pv)
|
||||
|
||||
os.makedirs(Path(tables_path).parent, exist_ok=True)
|
||||
tables= [table for table in tables.values()]
|
||||
|
||||
method_replace = {
|
||||
'KDEy': 'KDEy(orig)',
|
||||
'KDEy-scott': 'Scott',
|
||||
'KDEy-silver': 'Silver',
|
||||
'KDEy-NLL': 'NLL(grid)',
|
||||
'KDEy-NLL+': 'NLL(search)',
|
||||
'KDEy-AE': 'AE(grid)',
|
||||
'KDEy-AE+': 'AE(search)',
|
||||
'KDEy-RAE': 'RAE(grid)',
|
||||
'KDEy-RAE+': 'RAE(search)',
|
||||
}
|
||||
|
||||
Table.LatexPDF(tables_path, tables, method_replace=method_replace, verbose=True, clean=False)
|
||||
|
||||
|
||||
def collect_results(method_name, tables):
|
||||
|
||||
print('Init method', method_name)
|
||||
|
||||
with open(global_result_path + '.csv', 'at') as csv:
|
||||
for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
|
||||
print('init', dataset)
|
||||
|
||||
# run_experiment(global_result_path, method_name, quantifier, param_grid, dataset)
|
||||
local_result_path = os.path.join(Path(global_result_path).parent, method_name + '_' + dataset + '.dataframe')
|
||||
|
||||
if os.path.exists(local_result_path):
|
||||
print(f'result file {local_result_path} already exist; skipping')
|
||||
report = qp.util.load_report(local_result_path)
|
||||
for metric, table in tables.items():
|
||||
add_column = metric in ['tr_time', 'te_time']
|
||||
if not add_column:
|
||||
add_column = (metric=='mrae' and '-AE' not in method_name) or (metric=='mae' and '-RAE' not in method_name)
|
||||
if add_column:
|
||||
tables[metric].add(benchmark=dataset, method=method_name, v=report[metric])
|
||||
# tables['mrae'].add(benchmark=dataset, method=method_name, v=report['mrae'])
|
||||
|
||||
else:
|
||||
continue
|
||||
|
||||
means = report.mean(numeric_only=True)
|
||||
csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\t{means["tr_time"]:.3f}\t{means["te_time"]:.3f}\n')
|
||||
csv.flush()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 500
|
||||
qp.environ['N_JOBS'] = -1
|
||||
n_bags_val = 100
|
||||
n_bags_test = 500
|
||||
result_dir = f'results_quantification/ucimulti'
|
||||
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
|
||||
tables = {
|
||||
'mae': Table('inductive-mae'),
|
||||
'mrae': Table('inductive-mrae'),
|
||||
'tr_time': Table('inductive-tr-time'),
|
||||
# 'te_time': Table('inductive-te-time'),
|
||||
}
|
||||
|
||||
tables['tr_time'].format.show_std = False
|
||||
# tables['te_time'].format.show_std = False
|
||||
|
||||
|
||||
global_result_path = f'{result_dir}/allmethods'
|
||||
with open(global_result_path + '.csv', 'wt') as csv:
|
||||
csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\tTR-TIME\tTE-TIME\n')
|
||||
|
||||
for method_name, _, _ in METHODS + TRANSDUCTIVE_METHODS:
|
||||
collect_results(method_name, tables)
|
||||
|
||||
show_results(global_result_path, tables)
|
|
@ -40,7 +40,7 @@ class KDEyMLauto(KDEyML):
|
|||
current_bandwidth = 0.05
|
||||
if self.optim == 'both_fine':
|
||||
current_bandwidth = np.full(fill_value=current_bandwidth, shape=(n_classes,))
|
||||
current_prevalence = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
current_prevalence = F.uniform_prevalence(n_classes=n_classes)
|
||||
|
||||
if self.optim == 'max_likelihood':
|
||||
current_prevalence, current_bandwidth = self.optim_minimize_like(tr_posteriors, tr_y, te_posteriors, classes, grid=True)
|
||||
|
@ -107,9 +107,9 @@ class KDEyMLauto(KDEyML):
|
|||
|
||||
# bounds = [(0.00001, 0.2)]
|
||||
# r = optimize.minimize(neg_loglikelihood_bandwidth, x0=[current_bandwidth], method='SLSQP', bounds=bounds)
|
||||
r = optimize.minimize_scalar(neg_loglikelihood_bandwidth, bounds=(0.00001, 0.2))
|
||||
r = optimize.minimize_scalar(neg_loglikelihood_bandwidth, bounds=(0.0001, 0.2), options={'xatol': 0.005})
|
||||
# print(f'iterations-bandwidth={r.nit}')
|
||||
assert r.success, f'Process did not converge! {r.message}'
|
||||
# assert r.success, f'Process did not converge! {r.message}'
|
||||
return r.x
|
||||
|
||||
def optim_minimize_both(self, current_bandwidth, current_prev, tr_posteriors, tr_y, te_posteriors, classes):
|
||||
|
@ -128,7 +128,7 @@ class KDEyMLauto(KDEyML):
|
|||
prevalence_bandwidth = np.append(current_prev, current_bandwidth)
|
||||
r = optimize.minimize(neg_loglikelihood_bandwidth, x0=prevalence_bandwidth, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
print(f'iterations-both={r.nit}')
|
||||
assert r.success, 'Process did not converge!'
|
||||
# assert r.success, 'Process did not converge!'
|
||||
prev_band = r.x
|
||||
current_prevalence = prev_band[:-1]
|
||||
current_bandwidth = prev_band[-1]
|
||||
|
@ -145,12 +145,12 @@ class KDEyMLauto(KDEyML):
|
|||
test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
||||
return -np.sum(test_loglikelihood)
|
||||
|
||||
bounds = [(0, 1) for _ in range(n_classes)] + [(0.00001, 1) for _ in range(n_classes)]
|
||||
bounds = [(0, 1) for _ in range(n_classes)] + [(0.0001, 0.2) for _ in range(n_classes)]
|
||||
constraints = ({'type': 'eq', 'fun': lambda x: 1 - sum(x[:n_classes])})
|
||||
prevalence_bandwidth = np.concatenate((current_prev, current_bandwidth))
|
||||
r = optimize.minimize(neg_loglikelihood_bandwidth, x0=prevalence_bandwidth, method='SLSQP', bounds=bounds, constraints=constraints)
|
||||
print(f'iterations-both-fine={r.nit}')
|
||||
assert r.success, 'Process did not converge!'
|
||||
# assert r.success, 'Process did not converge!'
|
||||
prev_band = r.x
|
||||
current_prevalence = prev_band[:n_classes]
|
||||
current_bandwidth = prev_band[n_classes:]
|
||||
|
@ -198,7 +198,7 @@ class KDEyMLauto(KDEyML):
|
|||
best_like = None
|
||||
best_prev = None
|
||||
init_prev = np.full(fill_value=1 / n_classes, shape=(n_classes,))
|
||||
for bandwidth in np.logspace(-4, 0.5, 50):
|
||||
for bandwidth in np.logspace(-4, np.log10(0.2), 50):
|
||||
mix_densities = self.get_mixture_components(tr_posteriors, tr_y, classes, bandwidth)
|
||||
test_densities = [self.pdf(kde_i, te_posteriors) for kde_i in mix_densities]
|
||||
|
||||
|
@ -239,7 +239,7 @@ class KDEyMLauto(KDEyML):
|
|||
r = optimize.minimize(neglikelihood_band, x0=[0.001], method='SLSQP', bounds=bounds)
|
||||
|
||||
best_band = r.x[0]
|
||||
assert r.success, 'Process did not converge!'
|
||||
# assert r.success, 'Process did not converge!'
|
||||
print(f'solved in nit={r.nit}')
|
||||
return best_band
|
||||
|
||||
|
@ -333,11 +333,10 @@ class KDEyMLauto2(KDEyML):
|
|||
return loss_accum
|
||||
|
||||
if self.search == 'optim':
|
||||
r = optimize.minimize_scalar(eval_bandwidth, bounds=(0.001, 0.2), options={'xatol': 0.005})
|
||||
r = optimize.minimize_scalar(eval_bandwidth, bounds=(0.0001, 0.2), options={'xatol': 0.005})
|
||||
best_band = r.x
|
||||
best_loss_value = r.fun
|
||||
nit = r.nit
|
||||
# assert r.success, 'Process did not converge!'
|
||||
|
||||
elif self.search=='grid':
|
||||
nit=20
|
||||
|
@ -348,20 +347,20 @@ class KDEyMLauto2(KDEyML):
|
|||
self.bandwidth_ = best_band
|
||||
|
||||
|
||||
class KDEyMLred(KDEyML):
|
||||
def __init__(self, classifier: BaseEstimator=None, val_split=5, bandwidth=0.1, random_state=None, reduction=100, max_reduced=500):
|
||||
self.classifier = qp._get_classifier(classifier)
|
||||
self.val_split = val_split
|
||||
self.bandwidth = KDEBase._check_bandwidth(bandwidth)
|
||||
self.reduction = reduction
|
||||
self.max_reduced = max_reduced
|
||||
self.random_state = random_state
|
||||
|
||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||
n_classes = classif_predictions.n_classes
|
||||
tr_length = min(self.reduction * n_classes, self.max_reduced)
|
||||
if len(classif_predictions) > tr_length:
|
||||
classif_predictions = classif_predictions.sampling(tr_length)
|
||||
self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.classes_, self.bandwidth)
|
||||
return self
|
||||
# class KDEyMLred(KDEyML):
|
||||
# def __init__(self, classifier: BaseEstimator=None, val_split=5, bandwidth=0.1, random_state=None, reduction=100, max_reduced=500):
|
||||
# self.classifier = qp._get_classifier(classifier)
|
||||
# self.val_split = val_split
|
||||
# self.bandwidth = KDEBase._check_bandwidth(bandwidth)
|
||||
# self.reduction = reduction
|
||||
# self.max_reduced = max_reduced
|
||||
# self.random_state = random_state
|
||||
#
|
||||
# def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||
# n_classes = classif_predictions.n_classes
|
||||
# tr_length = min(self.reduction * n_classes, self.max_reduced)
|
||||
# if len(classif_predictions) > tr_length:
|
||||
# classif_predictions = classif_predictions.sampling(tr_length)
|
||||
# self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.classes_, self.bandwidth)
|
||||
# return self
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ import numpy as np
|
|||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2, KDEyMLred
|
||||
from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2
|
||||
from quapy.method.aggregative import PACC, EMQ, KDEyML
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import UPP
|
||||
|
@ -32,7 +32,7 @@ def wrap_hyper(classifier_hyper_grid: dict):
|
|||
|
||||
|
||||
METHODS = [
|
||||
('PACC', PACC(newLR()), wrap_hyper(logreg_grid)),
|
||||
# ('PACC', PACC(newLR()), wrap_hyper(logreg_grid)),
|
||||
('EMQ', EMQ(newLR()), wrap_hyper(logreg_grid)),
|
||||
('KDEy', KDEyML(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}),
|
||||
# ('KDEy-MLred', KDEyMLred(newLR()), {**wrap_hyper(logreg_grid), **{'bandwidth': np.logspace(-4, np.log10(0.2), 20)}}),
|
||||
|
@ -55,9 +55,9 @@ TKDEyML4 es como ML2 pero max 5 iteraciones por optimización
|
|||
"""
|
||||
TRANSDUCTIVE_METHODS = [
|
||||
#('TKDEy-ML', KDEyMLauto(newLR()), None),
|
||||
# ('TKDEy-MLboth', KDEyMLauto(newLR(), optim='both'), None),
|
||||
# ('TKDEy-MLbothfine', KDEyMLauto(newLR(), optim='both_fine'), None),
|
||||
# ('TKDEy-ML2', KDEyMLauto(newLR(), optim='two_steps'), None),
|
||||
('TKDEy-both', KDEyMLauto(newLR(), optim='both'), None),
|
||||
('TKDEy-bothfine', KDEyMLauto(newLR(), optim='both_fine'), None),
|
||||
('TKDEy-two', KDEyMLauto(newLR(), optim='two_steps'), None),
|
||||
# ('TKDEy-MLike', KDEyMLauto(newLR(), optim='max_likelihood'), None),
|
||||
# ('TKDEy-MLike2', KDEyMLauto(newLR(), optim='max_likelihood2'), None),
|
||||
#('TKDEy-ML3', KDEyMLauto(newLR()), None),
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit c223c9f1fe3c9708e8c5a5c56e438cdaaa857be4
|
||||
Subproject commit 52547b253e906b8ae8d5ae3df77dafe72fac6902
|
Loading…
Reference in New Issue