switching

This commit is contained in:
Alejandro Moreo Fernandez 2024-09-16 15:06:29 +02:00
parent af2c4eaf01
commit ede214aa54
2 changed files with 91 additions and 13 deletions

View File

@ -1,30 +1,108 @@
import numpy as np import numpy as np
from sklearn.linear_model import LogisticRegression
from os.path import join
import quapy as qp import quapy as qp
from protocol import UPP from quapy.protocol import UPP
from quapy.method.aggregative import KDEyML from quapy.method.aggregative import KDEyML
qp.environ["SAMPLE_SIZE"]=500 DEBUG = True
qp.environ["SAMPLE_SIZE"] = 100 if DEBUG else 500
val_repeats = 100 if DEBUG else 500
test_repeats = 100 if DEBUG else 500
if DEBUG:
qp.environ["DEFAULT_CLS"] = LogisticRegression()
test_results = {}
val_choice = {}
bandwidth_range = np.linspace(0.01, 0.20, 20)
if DEBUG:
bandwidth_range = np.linspace(0.01, 0.20, 10)
def datasets(): def datasets():
for dataset_name in qp.datasets.UCI_MULTICLASS_DATASETS: for dataset_name in qp.datasets.UCI_MULTICLASS_DATASETS[:4]:
yield qp.datasets.fetch_UCIMulticlassDataset(dataset_name) dataset = qp.datasets.fetch_UCIMulticlassDataset(dataset_name)
if DEBUG:
dataset = dataset.reduce(random_state=0)
yield dataset
for dataset in datasets(): def experiment_dataset(dataset):
train, test = dataset.train_test train, test = dataset.train_test
test_gen = UPP(test, repeats=500) test_gen = UPP(test, repeats=test_repeats)
# bandwidth chosen during model selection in validation
train_tr, train_va = train.split_stratified(random_state=0)
kdey = KDEyML(random_state=0)
modsel = qp.model_selection.GridSearchQ(
model=kdey,
param_grid={'bandwidth': bandwidth_range},
protocol=UPP(train_va, repeats=val_repeats),
refit=False,
n_jobs=-1
).fit(train_tr)
chosen_bandwidth = modsel.best_params_['bandwidth']
modsel_choice = float(chosen_bandwidth)
# results in test
print(f"testing KDEy in {dataset.name}") print(f"testing KDEy in {dataset.name}")
for b in np.linspace(0.01, 20, 20): dataset_results = []
for b in bandwidth_range:
kdey = KDEyML(bandwidth=b, random_state=0) kdey = KDEyML(bandwidth=b, random_state=0)
kdey.fit(train) kdey.fit(train)
report = qp.evaluation.evaluation_report(kdey, protocol=test_gen, error_metrics=['ae', 'rae', 'kld'], verbose=True) mae = qp.evaluation.evaluate(kdey, protocol=test_gen, error_metric='mae', verbose=True)
print(f'bandwidth={b}') print(f'bandwidth={b}: {mae:.5f}')
print(f'MAE={report["ae"].mean()}') dataset_results.append((float(b), float(mae)))
print(f'MRAE={report["ae"].mean()}')
return modsel_choice, dataset_results
def plot_bandwidth(val_choice, test_results):
for dataset_name in val_choice.keys():
import matplotlib.pyplot as plt
bandwidths, results = zip(*test_results[dataset_name])
# Crear la gráfica
plt.figure(figsize=(8, 6))
# Graficar los puntos de datos
plt.plot(bandwidths, results, marker='o')
# Agregar la línea vertical en bandwidth_chosen
plt.axvline(x=val_choice[dataset_name], color='r', linestyle='--', label=f'Bandwidth elegido: {val_choice[dataset_name]}')
# Agregar etiquetas y título
plt.xlabel('Bandwidth')
plt.ylabel('Resultado')
plt.title('Gráfica de Bandwidth vs Resultado')
# Mostrar la leyenda
plt.legend()
# Mostrar la gráfica
plt.grid(True)
plt.show()
for dataset in datasets():
if DEBUG:
result_path = f'./results/debug/{dataset.name}.pkl'
else:
result_path = f'./results/{dataset.name}.pkl'
modsel_choice, dataset_results = qp.util.pickled_resource(result_path, experiment_dataset, dataset)
val_choice[dataset.name] = modsel_choice
test_results[dataset.name] = dataset_results
print(f'Dataset = {dataset.name}')
print(modsel_choice)
print(dataset_results)
plot_bandwidth(val_choice, test_results)

View File

@ -502,7 +502,7 @@ class Dataset:
return len(self.vocabulary) return len(self.vocabulary)
@property @property
def train_test(self): def train_test(self) -> (LabelledCollection, LabelledCollection):
""" """
Alias to `self.training` and `self.test` Alias to `self.training` and `self.test`