with torch regressor

This commit is contained in:
Alejandro Moreo Fernandez 2024-09-27 16:22:18 +02:00
parent 04c1f286ce
commit f01d91b699
4 changed files with 199 additions and 12 deletions

18
LocalStack/_neural.py Normal file
View File

@ -0,0 +1,18 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class DistributionRegressor(nn.Module):
def __init__(self, n_classes, hidden_dim=64):
super(DistributionRegressor, self).__init__()
self.fc1 = nn.Linear(n_classes, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, n_classes)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.softmax(x, dim=-1)
return x

View File

@ -3,8 +3,7 @@ from time import time
import numpy as np import numpy as np
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
import quapy as qp import quapy as qp
from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2, KDEyMLred from LocalStack.method import *
from LocalStack.method import LocalStackingQuantification, LocalStackingQuantification2
from quapy.method.aggregative import PACC, EMQ, KDEyML from quapy.method.aggregative import PACC, EMQ, KDEyML
from quapy.model_selection import GridSearchQ from quapy.model_selection import GridSearchQ
from quapy.protocol import UPP from quapy.protocol import UPP
@ -21,8 +20,9 @@ METHODS = [
] ]
TRANSDUCTIVE_METHODS = [ TRANSDUCTIVE_METHODS = [
('LSQ', LocalStackingQuantification(EMQ()), {}), # ('LSQ', LocalStackingQuantification(EMQ()), {}),
('LSQ2', LocalStackingQuantification2(EMQ()), {}) # ('LSQ2', LocalStackingQuantification2(EMQ()), {}),
('LSQ-torch', LocalStackingQuantification3(EMQ()), {})
] ]
def show_results(result_path): def show_results(result_path):

View File

@ -1,23 +1,26 @@
import numpy as np import numpy as np
import torch
import quapy as qp import quapy as qp
from sklearn.multioutput import MultiOutputRegressor from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR from sklearn.svm import SVR
from LocalStack._neural import DistributionRegressor
from data import LabelledCollection from data import LabelledCollection
from quapy.method.base import BaseQuantifier from quapy.method.base import BaseQuantifier
from quapy.method.aggregative import AggregativeSoftQuantifier from quapy.method.aggregative import AggregativeSoftQuantifier
from tqdm import tqdm
class LocalStackingQuantification(BaseQuantifier): class LocalStackingQuantification(BaseQuantifier):
def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae', random_state=None): def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae'):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \ assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}' f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier self.surrogate_quantifier = surrogate_quantifier
self.n_samples_gen = n_samples_gen self.n_samples_gen = n_samples_gen
self.n_samples_sel = n_samples_sel self.n_samples_sel = n_samples_sel
self.comparison_measure = qp.error.from_name(comparison_measure) self.comparison_measure = qp.error.from_name(comparison_measure)
self.random_state = random_state
def fit(self, data: LabelledCollection): def fit(self, data: LabelledCollection):
train, val = data.split_stratified() train, val = data.split_stratified()
@ -38,7 +41,7 @@ class LocalStackingQuantification(BaseQuantifier):
samples_pred_prevs = [] samples_pred_prevs = []
samples_distance = [] samples_distance = []
for i in range(self.n_samples_gen): for i in range(self.n_samples_gen):
sample_i = self.val_data.sampling(test_size, *pred_prevs, random_state=self.random_state) sample_i = self.val_data.sampling(test_size, *pred_prevs)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X) pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
err_dist = self.comparison_measure(pred_prevs, pred_prev_sample_i) err_dist = self.comparison_measure(pred_prevs, pred_prev_sample_i)
@ -50,7 +53,7 @@ class LocalStackingQuantification(BaseQuantifier):
samples_sel = np.asarray(samples)[ord_distances][:self.n_samples_sel] samples_sel = np.asarray(samples)[ord_distances][:self.n_samples_sel]
samples_pred_prevs_sel = np.asarray(samples_pred_prevs)[ord_distances][:self.n_samples_sel] samples_pred_prevs_sel = np.asarray(samples_pred_prevs)[ord_distances][:self.n_samples_sel]
reg = MultiOutputRegressor(SVR()) reg = MultiOutputRegressor(SVR(C=1000))
reg_X = samples_pred_prevs_sel reg_X = samples_pred_prevs_sel
reg_y = [s.prevalence() for s in samples_sel] reg_y = [s.prevalence() for s in samples_sel]
reg.fit(reg_X, reg_y) reg.fit(reg_X, reg_y)
@ -69,14 +72,13 @@ class LocalStackingQuantification2(BaseQuantifier):
predica en test, saca directamente samples de training con la prevalencia predicha en test predica en test, saca directamente samples de training con la prevalencia predicha en test
""" """
def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae', random_state=None): def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae'):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \ assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}' f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier self.surrogate_quantifier = surrogate_quantifier
self.n_samples_gen = n_samples_gen self.n_samples_gen = n_samples_gen
self.n_samples_sel = n_samples_sel self.n_samples_sel = n_samples_sel
self.comparison_measure = qp.error.from_name(comparison_measure) self.comparison_measure = qp.error.from_name(comparison_measure)
self.random_state = random_state
def fit(self, data: LabelledCollection): def fit(self, data: LabelledCollection):
train, val = data.split_stratified() train, val = data.split_stratified()
@ -96,7 +98,7 @@ class LocalStackingQuantification2(BaseQuantifier):
samples = [] samples = []
samples_pred_prevs = [] samples_pred_prevs = []
for i in range(self.n_samples_gen): for i in range(self.n_samples_gen):
sample_i = self.val_data.sampling(test_size, *pred_prevs, random_state=self.random_state) sample_i = self.val_data.sampling(test_size, *pred_prevs)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X) pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
samples.append(sample_i) samples.append(sample_i)
samples_pred_prevs.append(pred_prev_sample_i) samples_pred_prevs.append(pred_prev_sample_i)
@ -109,4 +111,96 @@ class LocalStackingQuantification2(BaseQuantifier):
corrected_prev = reg.predict([pred_prevs])[0] corrected_prev = reg.predict([pred_prevs])[0]
corrected_prev = self.normalize(corrected_prev) corrected_prev = self.normalize(corrected_prev)
return corrected_prev return corrected_prev
class LocalStackingQuantification3(BaseQuantifier):
"""
Este hace una red neuronal para el regresor y optimiza una metrica especifica
"""
def __init__(self, surrogate_quantifier, batch_size=100, target='ae'):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier
self.batch_size = batch_size
self.target = target
if target not in ['ae']:
raise NotImplementedError('only AE supported')
def fit(self, data: LabelledCollection):
train, val = data.split_stratified()
self.surrogate_quantifier.fit(train)
self.val_data = val
return self
def gen_batch(self, test_size, pred_prevs):
samples_true_prevs = []
samples_pred_prevs = []
for i in range(self.batch_size):
sample_i = self.val_data.sampling(test_size, *pred_prevs)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
samples_true_prevs.append(sample_i.prevalence())
samples_pred_prevs.append(pred_prev_sample_i)
samples_pred_prevs = torch.from_numpy(np.asarray(samples_pred_prevs)).float()
samples_true_prevs = torch.from_numpy(np.asarray(samples_true_prevs)).float()
return samples_true_prevs, samples_pred_prevs
def quantify(self, instances: np.ndarray):
import torch
import torch.nn as nn
assert hasattr(self, 'val_data'), 'quantify called before fit'
pred_prevs = self.surrogate_quantifier.quantify(instances)
test_size = instances.shape[0]
n_classes = len(pred_prevs)
reg = DistributionRegressor(n_classes)
optimizer = torch.optim.Adam(reg.parameters(), lr=0.01)
loss_fn = nn.L1Loss()
reg.train()
n_epochs = 500
best_loss = None
PATIENCE = 10
patience = PATIENCE
pbar = tqdm(range(n_epochs), total=n_epochs)
for epoch in pbar:
true_prev, pred_prev = self.gen_batch(test_size, pred_prevs)
pred_prev_hat = reg(pred_prev)
loss = loss_fn(pred_prev_hat, true_prev)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_val = loss.item()
pbar.set_description(f'loss={loss_val:.5f}')
# early stop
if best_loss is None or loss_val < best_loss:
best_loss = loss_val
patience = PATIENCE
else:
patience -= 1
if patience <= 0:
print('\tearly stop!')
break
reg.eval()
with torch.no_grad():
target_prev = torch.from_numpy(pred_prevs).float()
corrected_prev = reg(target_prev)
corrected_prev = corrected_prev.detach().numpy()
return corrected_prev

View File

@ -0,0 +1,75 @@
import os
from time import time
import numpy as np
from sklearn.linear_model import LogisticRegression
import quapy as qp
from LocalStack.method import *
from quapy.method.aggregative import PACC, EMQ, KDEyML
from quapy.model_selection import GridSearchQ
from quapy.protocol import UPP
from pathlib import Path
SEED = 1
METHODS = [
('PACC', PACC(), {}),
('EMQ', EMQ(), {}),
('KDEy-ML', KDEyML(), {}),
]
TRANSDUCTIVE_METHODS = [
('LSQ', LocalStackingQuantification(EMQ()), {}),
('LSQ2', LocalStackingQuantification2(EMQ()), {}),
('LSQ-torch', LocalStackingQuantification3(EMQ()), {})
]
def show_results(result_path):
import pandas as pd
df = pd.read_csv(result_path + '.csv', sep='\t')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000) # Ajustar el ancho máximo
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["MRAE"], margins=True)
print(pv)
# pv = df.pivot_table(index='Dataset', columns="Method", values=["KLD"], margins=True)
# print(pv)
# pv = df.pivot_table(index='Dataset', columns="Method", values=["TR-TIME"], margins=True)
# print(pv)
# pv = df.pivot_table(index='Dataset', columns="Method", values=["TE-TIME"], margins=True)
# print(pv)
if __name__ == '__main__':
qp.environ['SAMPLE_SIZE'] = 500
qp.environ['N_JOBS'] = -1
n_bags_val = 25
n_bags_test = 100
result_dir = f'results_quantification/localstack'
os.makedirs(result_dir, exist_ok=True)
global_result_path = f'{result_dir}/allmethods'
with open(global_result_path + '.csv', 'wt') as csv:
csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\tTR-TIME\tTE-TIME\n')
for method_name, quantifier, param_grid in METHODS + TRANSDUCTIVE_METHODS:
with open(global_result_path + '.csv', 'at') as csv:
for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
local_result_path = os.path.join(Path(global_result_path).parent, method_name + '_' + dataset + '.dataframe')
if os.path.exists(local_result_path):
# print(f'result file {local_result_path} already exist; skipping')
report = qp.util.load_report(local_result_path)
means = report.mean(numeric_only=True)
csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\t{means["tr_time"]:.3f}\t{means["te_time"]:.3f}\n')
csv.flush()
show_results(global_result_path)