switch
This commit is contained in:
parent
6ce5eea4f2
commit
9fb208fe4c
|
@ -1,16 +1,16 @@
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
|
import shutil
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
from os.path import join
|
from os.path import join
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.protocol import UPP
|
from quapy.protocol import UPP
|
||||||
from kdey_devel import KDEyML
|
from kdey_devel import KDEyML
|
||||||
|
from utils import measuretime
|
||||||
|
|
||||||
|
|
||||||
|
DEBUG = True
|
||||||
DEBUG = False
|
|
||||||
|
|
||||||
qp.environ["SAMPLE_SIZE"] = 100 if DEBUG else 500
|
qp.environ["SAMPLE_SIZE"] = 100 if DEBUG else 500
|
||||||
val_repeats = 100 if DEBUG else 500
|
val_repeats = 100 if DEBUG else 500
|
||||||
|
@ -23,20 +23,24 @@ val_choice = {}
|
||||||
|
|
||||||
bandwidth_range = np.linspace(0.01, 0.20, 20)
|
bandwidth_range = np.linspace(0.01, 0.20, 20)
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
bandwidth_range = np.linspace(0.01, 0.20, 10)
|
bandwidth_range = np.linspace(0.01, 0.20, 5)
|
||||||
|
|
||||||
|
|
||||||
def datasets():
|
def datasets():
|
||||||
for dataset_name in qp.datasets.UCI_MULTICLASS_DATASETS:
|
dataset_list = qp.datasets.UCI_MULTICLASS_DATASETS
|
||||||
|
if DEBUG:
|
||||||
|
dataset_list = dataset_list[:4]
|
||||||
|
for dataset_name in dataset_list:
|
||||||
dataset = qp.datasets.fetch_UCIMulticlassDataset(dataset_name)
|
dataset = qp.datasets.fetch_UCIMulticlassDataset(dataset_name)
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
dataset = dataset.reduce(random_state=0)
|
dataset = dataset.reduce(random_state=0)
|
||||||
yield dataset
|
yield dataset
|
||||||
|
|
||||||
|
|
||||||
def predict_b_modsel(train):
|
@measuretime
|
||||||
tinit = 0
|
def predict_b_modsel(dataset):
|
||||||
# bandwidth chosen during model selection in validation
|
# bandwidth chosen during model selection in validation
|
||||||
|
train = dataset.training
|
||||||
train_tr, train_va = train.split_stratified(random_state=0)
|
train_tr, train_va = train.split_stratified(random_state=0)
|
||||||
kdey = KDEyML(random_state=0)
|
kdey = KDEyML(random_state=0)
|
||||||
modsel = qp.model_selection.GridSearchQ(
|
modsel = qp.model_selection.GridSearchQ(
|
||||||
|
@ -49,74 +53,73 @@ def predict_b_modsel(train):
|
||||||
).fit(train_tr)
|
).fit(train_tr)
|
||||||
chosen_bandwidth = modsel.best_params_['bandwidth']
|
chosen_bandwidth = modsel.best_params_['bandwidth']
|
||||||
modsel_choice = float(chosen_bandwidth)
|
modsel_choice = float(chosen_bandwidth)
|
||||||
tend =
|
# kdey.set_params(bandwidth=chosen_bandwidth)
|
||||||
|
# kdey.fit(train)
|
||||||
|
# kdey.qua
|
||||||
return modsel_choice
|
return modsel_choice
|
||||||
|
|
||||||
def experiment_dataset(dataset):
|
|
||||||
|
def in_test_search(dataset, n_jobs=-1):
|
||||||
train, test = dataset.train_test
|
train, test = dataset.train_test
|
||||||
test_gen = UPP(test, repeats=test_repeats)
|
|
||||||
|
|
||||||
# bandwidth chosen during model selection in validation
|
|
||||||
train_tr, train_va = train.split_stratified(random_state=0)
|
|
||||||
kdey = KDEyML(random_state=0)
|
|
||||||
modsel = qp.model_selection.GridSearchQ(
|
|
||||||
model=kdey,
|
|
||||||
param_grid={'bandwidth': bandwidth_range},
|
|
||||||
protocol=UPP(train_va, repeats=val_repeats),
|
|
||||||
refit=False,
|
|
||||||
n_jobs=-1,
|
|
||||||
verbose=True
|
|
||||||
).fit(train_tr)
|
|
||||||
chosen_bandwidth = modsel.best_params_['bandwidth']
|
|
||||||
modsel_choice = float(chosen_bandwidth)
|
|
||||||
|
|
||||||
# results in test
|
|
||||||
print(f"testing KDEy in {dataset.name}")
|
print(f"testing KDEy in {dataset.name}")
|
||||||
dataset_results = []
|
|
||||||
for b in bandwidth_range:
|
def experiment_job(bandwidth):
|
||||||
kdey = KDEyML(bandwidth=b, random_state=0)
|
kdey = KDEyML(bandwidth=bandwidth, random_state=0)
|
||||||
kdey.fit(train)
|
kdey.fit(train)
|
||||||
|
test_gen = UPP(test, repeats=test_repeats)
|
||||||
mae = qp.evaluation.evaluate(kdey, protocol=test_gen, error_metric='mae', verbose=True)
|
mae = qp.evaluation.evaluate(kdey, protocol=test_gen, error_metric='mae', verbose=True)
|
||||||
print(f'bandwidth={b}: {mae:.5f}')
|
print(f'{bandwidth=}: {mae:.5f}')
|
||||||
dataset_results.append((float(b), float(mae)))
|
return float(mae)
|
||||||
|
|
||||||
return modsel_choice, dataset_results
|
dataset_results = qp.util.parallel(experiment_job, bandwidth_range, n_jobs=n_jobs)
|
||||||
|
return dataset_results, bandwidth_range
|
||||||
|
|
||||||
def plot_bandwidth(val_choice, test_results):
|
|
||||||
for dataset_name in val_choice.keys():
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
bandwidths, results = zip(*test_results[dataset_name])
|
def plot_bandwidth(dataset_name, test_results, bandwidths, triplet_list_results):
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
print(dataset_name)
|
print("PLOT", dataset_name)
|
||||||
print(bandwidths)
|
print(dataset_name)
|
||||||
print(results)
|
|
||||||
|
|
||||||
# Crear la gráfica
|
plt.figure(figsize=(8, 6))
|
||||||
plt.figure(figsize=(8, 6))
|
|
||||||
|
|
||||||
# Graficar los puntos de datos
|
# show test results
|
||||||
plt.plot(bandwidths, results, marker='o')
|
plt.plot(bandwidths, test_results, marker='o')
|
||||||
|
|
||||||
# Agregar la línea vertical en bandwidth_chosen
|
for (method_name, method_choice, method_time) in triplet_list_results:
|
||||||
plt.axvline(x=val_choice[dataset_name], color='r', linestyle='--', label=f'bandwidth mod-sel: {val_choice[dataset_name]}')
|
plt.axvline(x=method_choice, linestyle='--', label=method_name)
|
||||||
|
|
||||||
# Agregar etiquetas y título
|
# Agregar etiquetas y título
|
||||||
plt.xlabel('Bandwidth')
|
plt.xlabel('Bandwidth')
|
||||||
plt.ylabel('MAE')
|
plt.ylabel('MAE')
|
||||||
plt.title(dataset_name)
|
plt.title(dataset_name)
|
||||||
|
|
||||||
# Mostrar la leyenda
|
# Mostrar la leyenda
|
||||||
plt.legend()
|
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
||||||
|
|
||||||
# Mostrar la gráfica
|
# Mostrar la gráfica
|
||||||
plt.grid(True)
|
plt.grid(True)
|
||||||
# plt.show()
|
|
||||||
os.makedirs('./plots', exist_ok=True)
|
|
||||||
plt.savefig(f'./plots/{dataset_name}.png')
|
|
||||||
plt.close()
|
|
||||||
|
|
||||||
|
plotdir = './plots'
|
||||||
|
if DEBUG:
|
||||||
|
plotdir = './plots_debug'
|
||||||
|
os.makedirs(plotdir, exist_ok=True)
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(f'{plotdir}/{dataset_name}.png')
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
def error_table(dataset_name, test_results, bandwidth_range, triplet_list_results):
|
||||||
|
best_bandwidth = bandwidth_range[np.argmin(test_results)]
|
||||||
|
print(f'Method\tChoice\tAE\tTime')
|
||||||
|
for method_name, method_choice, took in triplet_list_results:
|
||||||
|
if method_choice in bandwidth_range:
|
||||||
|
index = np.where(bandwidth_range == method_choice)[0][0]
|
||||||
|
method_score = test_results[index]
|
||||||
|
else:
|
||||||
|
method_score = 1
|
||||||
|
error = np.abs(best_bandwidth-method_score)
|
||||||
|
print(f'{method_name}\t{method_choice}\t{error}\t{took:.3}s')
|
||||||
|
|
||||||
|
|
||||||
for dataset in datasets():
|
for dataset in datasets():
|
||||||
|
@ -124,20 +127,25 @@ for dataset in datasets():
|
||||||
print(len(dataset.training))
|
print(len(dataset.training))
|
||||||
print(len(dataset.test))
|
print(len(dataset.test))
|
||||||
|
|
||||||
|
result_path = f'./results/{dataset.name}/'
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
result_path = f'./results/debug/{dataset.name}.pkl'
|
result_path = result_path.replace('results', 'results_debug')
|
||||||
else:
|
if os.path.exists(result_path):
|
||||||
result_path = f'./results/{dataset.name}.pkl'
|
shutil.rmtree(result_path)
|
||||||
|
|
||||||
modsel_choice, dataset_results = qp.util.pickled_resource(result_path, experiment_dataset, dataset)
|
dataset_results, bandwidth_range = qp.util.pickled_resource(join(result_path, 'test.pkl'), in_test_search, dataset)
|
||||||
val_choice[dataset.name] = modsel_choice
|
|
||||||
test_results[dataset.name] = dataset_results
|
triplet_list_results = []
|
||||||
|
modsel_choice, modsel_time = qp.util.pickled_resource(join(result_path, 'modsel.pkl'), predict_b_modsel, dataset)
|
||||||
|
triplet_list_results.append(('modsel', modsel_choice, modsel_time,))
|
||||||
|
|
||||||
print(f'Dataset = {dataset.name}')
|
print(f'Dataset = {dataset.name}')
|
||||||
print(modsel_choice)
|
print(modsel_choice)
|
||||||
print(dataset_results)
|
print(dataset_results)
|
||||||
|
|
||||||
plot_bandwidth(val_choice, test_results)
|
plot_bandwidth(dataset.name, dataset_results, bandwidth_range, triplet_list_results)
|
||||||
|
error_table(dataset.name, dataset_results, bandwidth_range, triplet_list_results)
|
||||||
|
# time_table(dataset.name, dataset_results, bandwidth_range, triplet_list_results)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,9 +4,12 @@ from functools import wraps
|
||||||
def measuretime(func):
|
def measuretime(func):
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
def wrapper(*args, **kwargs):
|
def wrapper(*args, **kwargs):
|
||||||
start_time = time.time() # inicia el contador de tiempo
|
start_time = time.time()
|
||||||
result = func(*args, **kwargs) # ejecuta la función original
|
result = func(*args, **kwargs)
|
||||||
end_time = time.time() # finaliza el contador de tiempo
|
end_time = time.time()
|
||||||
time_it_took = end_time - start_time # calcula el tiempo total
|
time_it_took = end_time - start_time
|
||||||
return result, time_it_took # devuelve el resultado y el tiempo
|
if isinstance(result, tuple):
|
||||||
|
return (*result, time_it_took)
|
||||||
|
else:
|
||||||
|
return result, time_it_took
|
||||||
return wrapper
|
return wrapper
|
Loading…
Reference in New Issue