Compare commits

...

20 Commits

Author SHA1 Message Date
Alejandro Moreo Fernandez 9aecdad66f improving plots debug 2024-10-16 17:44:59 +02:00
Alejandro Moreo Fernandez cdf0200430 switchign 2024-09-27 16:25:05 +02:00
Alejandro Moreo Fernandez 3686e820fe Merge branch 'kdey2' of gitea-s2i2s.isti.cnr.it:moreo/QuaPy into kdey2 2024-09-27 10:20:26 +02:00
Alejandro Moreo Fernandez 14ff3c9884 switching to kde 2024-09-27 10:18:20 +02:00
Alejandro Moreo Fernandez 641228bf62 Merge branch 'localstack' of gitea-s2i2s.isti.cnr.it:moreo/QuaPy into localstack 2024-09-26 16:06:01 +02:00
Alejandro Moreo Fernandez 04c1f286ce local stack 2024-09-26 16:05:03 +02:00
Alejandro Moreo Fernandez a271fe1231
Merge pull request #42 from mirkobunse/devel
Fix PyPI: replace the direct extra dependency quapy[composable] with documentation on how to install through git
2024-09-17 10:57:10 +02:00
Mirko Bunse 5e2fc07fc5 Merge remote-tracking branch 'fork-origin/master' into devel 2024-09-17 10:49:39 +02:00
Mirko Bunse 73755b73e8 Merge remote-tracking branch 'fork-origin/devel' into devel 2024-09-17 10:49:32 +02:00
Mirko Bunse db8a870495 Instruct the user how to install qunfold in the case of an unsuccessful import 2024-09-17 10:48:53 +02:00
Alejandro Moreo Fernandez b485205c7c cleaning dir KDEy 2024-09-17 10:39:39 +02:00
Mirko Bunse 9be729386a Fix PyPI: replace the direct extra dependency quapy[composable] with documentation on how to install through git 2024-09-17 10:19:26 +02:00
Alejandro Moreo Fernandez ffcfd64957 Merge branch 'mirkobunse-devel' into devel 2024-09-17 10:13:55 +02:00
Alejandro Moreo Fernandez 1f1757f0ee Merge branch 'devel' of github.com:mirkobunse QuaPy into mirkobunse-devel 2024-09-17 10:12:17 +02:00
Alejandro Moreo Fernandez cea96e87c6 added path to sys.path in config 2024-09-16 15:30:34 +02:00
Alejandro Moreo Fernandez 584a4d07d4 removing pylint 2024-09-16 15:07:19 +02:00
Mirko Bunse 3895cba610 Revert "TO REVERT: build gh-pages even on pushes to devel"
This reverts commit de3f8fd300.
2024-09-16 13:56:00 +02:00
Mirko Bunse de3f8fd300 TO REVERT: build gh-pages even on pushes to devel 2024-09-16 13:48:11 +02:00
Mirko Bunse 2311bb6649 CI: replace ammaraskar/sphinx-action with custom run commands 2024-09-16 13:44:35 +02:00
Alejandro Moreo Fernandez 55c62a9dd2 adding name to datasets un fetch_UCIMulticlassDataset 2024-09-16 13:22:36 +02:00
12 changed files with 386 additions and 85 deletions

View File

@ -20,15 +20,16 @@ jobs:
env:
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install -e .[bayes,composable,tests]
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
python -m pip install -e .[bayes,tests]
- name: Test with unittest
run: python -m unittest
@ -38,15 +39,18 @@ jobs:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/master'
steps:
- uses: actions/checkout@v1
- name: Build documentation
uses: ammaraskar/sphinx-action@master
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
pre-build-command: |
apt-get --allow-releaseinfo-change update -y && apt-get install -y git && git --version
python-version: 3.11
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel "jax[cpu]"
python -m pip install -e .[composable,neural,docs]
docs-folder: "docs/"
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
python -m pip install -e .[neural,docs]
- name: Build documentation
run: sphinx-build -M html docs/source docs/build
- name: Publish documentation
run: |
git clone ${{ github.server_url }}/${{ github.repository }}.git --branch gh-pages --single-branch __gh-pages/

View File

@ -1,23 +0,0 @@
name: Pylint
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
- name: Analysing the code with pylint
run: |
pylint $(git ls-files '*.py')

View File

@ -326,7 +326,7 @@ class KDEyMLauto2(KDEyML):
if self.target == 'likelihood':
loss_fn = neg_loglikelihood_prev
else:
loss_fn = lambda prev_hat: qp.error.from_name(self.target)(prev, prev_hat)
loss_fn = lambda prev_hat: qp.error.from_name(self.target)(prevtrue, prev_hat)
pred_prev, neglikelihood = optim_minimize(loss_fn, init_prev, return_loss=True)
loss_accum += neglikelihood

View File

@ -43,7 +43,7 @@ METHODS = [
('KDEy-AE', KDEyMLauto2(newLR(), bandwidth='auto', target='mae', search='grid'), wrap_hyper(logreg_grid)),
('KDEy-AE+', KDEyMLauto2(newLR(), bandwidth='auto', target='mae', search='optim'), wrap_hyper(logreg_grid)),
('KDEy-RAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae', search='grid'), wrap_hyper(logreg_grid)),
('KDEy-RAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae', search='optim'), wrap_hyper(logreg_grid)),
('KDEy-RAE+', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae', search='optim'), wrap_hyper(logreg_grid)),
]

View File

@ -28,7 +28,7 @@ def plot(xaxis, metrics_measurements, metrics_names, suffix):
fig, ax1 = plt.subplots(figsize=(8, 6))
def add_plot(ax, mean_error, std_error, name, color, marker):
ax.plot(xaxis, mean_error, label=name, marker=marker, color=color)
ax.plot(xaxis, mean_error, label=name, marker=marker, color=color, markersize=3)
if std_error is not None:
ax.fill_between(xaxis, mean_error - std_error, mean_error + std_error, color=color, alpha=0.2)
@ -74,6 +74,56 @@ def plot(xaxis, metrics_measurements, metrics_names, suffix):
plt.close()
def plot_stack(xaxis, metrics_measurements, metrics_names, suffix):
# Crear la figura y los ejes (4 bloques verticales)
fig, axs = plt.subplots(4, 1, figsize=(8, 12))
x = xaxis
indexes = np.arange(len(metrics_measurements))
axs_idx = 0
# colors = ['b', 'g', 'r', 'c', 'purple']
for m_te, m_tr in zip(indexes[:-1:2], indexes[1::2]):
metric_te, metric_tr = metrics_measurements[m_te], metrics_measurements[m_tr]
metric_te_name, metric_tr_name = metrics_names[m_te], metrics_names[m_tr]
metric_mean_tr = np.mean(metric_tr, axis=0)
metric_std_tr = np.std(metric_tr, axis=0)
metric_mean_te = np.mean(metric_te, axis=0)
metric_std_te = np.std(metric_te, axis=0)
axs[axs_idx].plot(xaxis, metric_mean_tr, label=metric_tr_name, marker='o', color='r', markersize=3)
axs[axs_idx].fill_between(xaxis, metric_mean_tr - metric_std_tr, metric_mean_tr + metric_std_tr, color='r', alpha=0.2)
minx = np.argmin(metric_mean_tr)
axs[axs_idx].axvline(xaxis[minx], color='r', linestyle='--', linewidth=1)
axs[axs_idx].plot(xaxis, metric_mean_te, label=metric_te_name, marker='o', color='b', markersize=3)
axs[axs_idx].fill_between(xaxis, metric_mean_te - metric_std_te, metric_mean_te + metric_std_te, color='b', alpha=0.2)
minx = np.argmin(metric_mean_te)
axs[axs_idx].axvline(xaxis[minx], color='b', linestyle='--', linewidth=1)
# axs[axs_idx].set_title(f'{metric_te_name} and {metric_tr_name}')
axs[axs_idx].legend(loc='lower right')
if axs_idx < len(indexes)//2 -1:
axs[axs_idx].set_xticks([])
axs_idx += 1
# Ajustar el espaciado entre los subplots
plt.tight_layout()
# Mostrar el gráfico
# Mostrar el gráfico
# plt.title(dataset)
# plt.show()
os.makedirs('./plots/likelihood/', exist_ok=True)
plt.savefig(f'./plots/likelihood/{dataset}-fig{suffix}.png')
plt.close()
def generate_data(from_train=False):
data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
n_classes = data.n_classes
@ -110,7 +160,7 @@ def generate_data(from_train=False):
likelihood_value = []
# for bandwidth in np.linspace(0.01, 0.2, 50):
for bandwidth in np.logspace(-5, np.log10(0.2), 50):
for bandwidth in np.logspace(-4, np.log10(0.2), 50):
mix_densities = kde.get_mixture_components(tr_posteriors, tr_y, classes, bandwidth)
test_densities = [kde.pdf(kde_i, te_posteriors) for kde_i in mix_densities]
@ -172,16 +222,24 @@ for i, dataset in enumerate(tqdm(DATASETS, desc='processing datasets', total=len
measurement_names = []
if show_ae:
measurements.append(AE_error_te)
measurement_names.append('AE')
measurement_names.append('AE(te)')
measurements.append(AE_error_tr)
measurement_names.append('AE(tr)')
if show_rae:
measurements.append(RAE_error_te)
measurement_names.append('RAE')
measurement_names.append('RAE(te)')
measurements.append(RAE_error_tr)
measurement_names.append('RAE(tr)')
if show_kld:
measurements.append(KLD_error_te)
measurement_names.append('KLD')
measurement_names.append('KLD(te)')
measurements.append(KLD_error_tr)
measurement_names.append('KLD(tr)')
if show_mse:
measurements.append(MSE_error_te)
measurement_names.append('MSE')
measurement_names.append('MSE(te)')
measurements.append(MSE_error_tr)
measurement_names.append('MSE(tr)')
measurements.append(normalize_metric(LIKE_value_te))
measurements.append(normalize_metric(LIKE_value_tr))
measurement_names.append('NLL(te)')
@ -200,7 +258,8 @@ for i, dataset in enumerate(tqdm(DATASETS, desc='processing datasets', total=len
# measurements.append(normalize_metric(LIKE_value_tr))
# measurement_names.append('NLL(te)')
# measurement_names.append('NLL(tr)')
plot(xaxis, measurements, measurement_names, suffix='AVEtr')
# plot(xaxis, measurements, measurement_names, suffix='AVEtr')
plot_stack(xaxis, measurements, measurement_names, suffix='AVEtr')

126
LocalStack/experiments.py Normal file
View File

@ -0,0 +1,126 @@
import os
from time import time
import numpy as np
from sklearn.linear_model import LogisticRegression
import quapy as qp
#from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2, KDEyMLred
from LocalStack.method import LocalStackingQuantification, LocalStackingQuantification2
from quapy.method.aggregative import PACC, EMQ, KDEyML
from quapy.model_selection import GridSearchQ
from quapy.protocol import UPP
from pathlib import Path
SEED = 1
METHODS = [
('PACC', PACC(), {}),
('EMQ', EMQ(), {}),
('KDEy-ML', KDEyML(), {}),
]
TRANSDUCTIVE_METHODS = [
('LSQ', LocalStackingQuantification(EMQ()), {}),
('LSQ2', LocalStackingQuantification2(EMQ()), {})
]
def show_results(result_path):
import pandas as pd
df = pd.read_csv(result_path + '.csv', sep='\t')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000) # Ajustar el ancho máximo
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["MRAE"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["KLD"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["TR-TIME"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["TE-TIME"], margins=True)
print(pv)
if __name__ == '__main__':
qp.environ['SAMPLE_SIZE'] = 500
qp.environ['N_JOBS'] = -1
n_bags_val = 25
n_bags_test = 100
result_dir = f'results_quantification/localstack'
os.makedirs(result_dir, exist_ok=True)
global_result_path = f'{result_dir}/allmethods'
with open(global_result_path + '.csv', 'wt') as csv:
csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\tTR-TIME\tTE-TIME\n')
for method_name, quantifier, param_grid in METHODS + TRANSDUCTIVE_METHODS:
print('Init method', method_name)
with open(global_result_path + '.csv', 'at') as csv:
for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
print('init', dataset)
# run_experiment(global_result_path, method_name, quantifier, param_grid, dataset)
local_result_path = os.path.join(Path(global_result_path).parent, method_name + '_' + dataset + '.dataframe')
if os.path.exists(local_result_path):
print(f'result file {local_result_path} already exist; skipping')
report = qp.util.load_report(local_result_path)
else:
with qp.util.temp_seed(SEED):
data = qp.datasets.fetch_UCIMulticlassDataset(dataset, verbose=True)
train, test = data.train_test
transductive_names = [name for (name, *_) in TRANSDUCTIVE_METHODS]
if method_name not in transductive_names:
if len(param_grid) == 0:
t_init = time()
quantifier.fit(train)
train_time = time() - t_init
else:
# model selection (train)
train, val = train.split_stratified(random_state=SEED)
protocol = UPP(val, repeats=n_bags_val)
modsel = GridSearchQ(
quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=1, error='mae'
)
t_init = time()
try:
modsel.fit(train)
print(f'best params {modsel.best_params_}')
print(f'best score {modsel.best_score_}')
quantifier = modsel.best_model()
except:
print('something went wrong... trying to fit the default model')
quantifier.fit(train)
train_time = time() - t_init
else:
# transductive
t_init = time()
quantifier.fit(train) # <-- nothing actually (proyects the X into posteriors only)
train_time = time() - t_init
# test
t_init = time()
protocol = UPP(test, repeats=n_bags_test)
report = qp.evaluation.evaluation_report(
quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True
)
test_time = time() - t_init
report['tr_time'] = train_time
report['te_time'] = test_time
report.to_csv(local_result_path)
means = report.mean(numeric_only=True)
csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\t{means["tr_time"]:.3f}\t{means["te_time"]:.3f}\n')
csv.flush()
show_results(global_result_path)

112
LocalStack/method.py Normal file
View File

@ -0,0 +1,112 @@
import numpy as np
import quapy as qp
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier
from quapy.method.aggregative import AggregativeSoftQuantifier
class LocalStackingQuantification(BaseQuantifier):
def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae', random_state=None):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier
self.n_samples_gen = n_samples_gen
self.n_samples_sel = n_samples_sel
self.comparison_measure = qp.error.from_name(comparison_measure)
self.random_state = random_state
def fit(self, data: LabelledCollection):
train, val = data.split_stratified()
self.surrogate_quantifier.fit(train)
self.val_data = val
return self
def normalize(self, out_simplex:np.ndarray):
in_simplex = out_simplex/out_simplex.sum()
return in_simplex
def quantify(self, instances: np.ndarray):
assert hasattr(self, 'val_data'), 'quantify called before fit'
pred_prevs = self.surrogate_quantifier.quantify(instances)
test_size = instances.shape[0]
samples = []
samples_pred_prevs = []
samples_distance = []
for i in range(self.n_samples_gen):
sample_i = self.val_data.sampling(test_size, *pred_prevs, random_state=self.random_state)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
err_dist = self.comparison_measure(pred_prevs, pred_prev_sample_i)
samples.append(sample_i)
samples_pred_prevs.append(pred_prev_sample_i)
samples_distance.append(err_dist)
ord_distances = np.argsort(samples_distance)
samples_sel = np.asarray(samples)[ord_distances][:self.n_samples_sel]
samples_pred_prevs_sel = np.asarray(samples_pred_prevs)[ord_distances][:self.n_samples_sel]
reg = MultiOutputRegressor(SVR())
reg_X = samples_pred_prevs_sel
reg_y = [s.prevalence() for s in samples_sel]
reg.fit(reg_X, reg_y)
corrected_prev = reg.predict([pred_prevs])[0]
corrected_prev = self.normalize(corrected_prev)
return corrected_prev
class LocalStackingQuantification2(BaseQuantifier):
"""
Este en vez de seleccionar samples de training para los que la prevalencia predicha se parece a la prevalencia
predica en test, saca directamente samples de training con la prevalencia predicha en test
"""
def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae', random_state=None):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier
self.n_samples_gen = n_samples_gen
self.n_samples_sel = n_samples_sel
self.comparison_measure = qp.error.from_name(comparison_measure)
self.random_state = random_state
def fit(self, data: LabelledCollection):
train, val = data.split_stratified()
self.surrogate_quantifier.fit(train)
self.val_data = val
return self
def normalize(self, out_simplex:np.ndarray):
in_simplex = out_simplex/out_simplex.sum()
return in_simplex
def quantify(self, instances: np.ndarray):
assert hasattr(self, 'val_data'), 'quantify called before fit'
pred_prevs = self.surrogate_quantifier.quantify(instances)
test_size = instances.shape[0]
samples = []
samples_pred_prevs = []
for i in range(self.n_samples_gen):
sample_i = self.val_data.sampling(test_size, *pred_prevs, random_state=self.random_state)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
samples.append(sample_i)
samples_pred_prevs.append(pred_prev_sample_i)
reg = MultiOutputRegressor(SVR())
reg_X = samples_pred_prevs
reg_y = [s.prevalence() for s in samples]
reg.fit(reg_X, reg_y)
corrected_prev = reg.predict([pred_prevs])[0]
corrected_prev = self.normalize(corrected_prev)
return corrected_prev

View File

@ -11,9 +11,14 @@ import sys
from os.path import join
quapy_path = join(pathlib.Path(__file__).parents[2].resolve().as_posix(), 'quapy')
wiki_path = join(pathlib.Path(__file__).parents[0].resolve().as_posix(), 'wiki')
source_path = pathlib.Path(__file__).parents[2].resolve().as_posix()
print(f'quapy path={quapy_path}')
print(f'quapy source path={source_path}')
sys.path.insert(0, quapy_path)
sys.path.insert(0, wiki_path)
sys.path.insert(0, source_path)
print(sys.path)
project = 'QuaPy: A Python-based open-source framework for quantification'

View File

@ -447,7 +447,7 @@ The [](quapy.method.composable) module allows the composition of quantification
```sh
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install quapy[composable]
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
```
### Basics

View File

@ -2,6 +2,13 @@
This example illustrates the composition of quantification methods from
arbitrary loss functions and feature transformations. It will extend the basic
example on the usage of quapy with this composition.
This example requires the installation of qunfold, the back-end of QuaPy's
composition module:
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
"""
import numpy as np

View File

@ -1,5 +1,15 @@
"""This module allows the composition of quantification methods from loss functions and feature transformations. This functionality is realized through an integration of the qunfold package: https://github.com/mirkobunse/qunfold."""
_import_error_message = """qunfold, the back-end of quapy.method.composable, is not properly installed.
To fix this error, call:
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
"""
try:
import qunfold
from qunfold.quapy import QuaPyWrapper
from qunfold.sklearn import CVClassifier
@ -40,6 +50,8 @@ __all__ = [ # control public members, e.g., for auto-documentation in sphinx; om
"GaussianKernelTransformer",
"GaussianRFFKernelTransformer",
]
except ImportError as e:
raise ImportError(_import_error_message) from e
def ComposableQuantifier(loss, transformer, **kwargs):
"""A generic quantification / unfolding method that solves a linear system of equations.

View File

@ -125,7 +125,6 @@ setup(
# projects.
extras_require={ # Optional
'bayes': ['jax', 'jaxlib', 'numpyro'],
'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4'],
'neural': ['torch'],
'tests': ['certifi'],
'docs' : ['sphinx-rtd-theme', 'myst-parser'],