Compare commits

..

No commits in common. "9aecdad66f156cf4134dc0a055e97f859a5216f5" and "5bcaa8d1bb7b0704cc2aecdbc196a68b3be059f2" have entirely different histories.

12 changed files with 85 additions and 386 deletions

View File

@ -20,16 +20,15 @@ jobs:
env:
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
python -m pip install -e .[bayes,tests]
python -m pip install -e .[bayes,composable,tests]
- name: Test with unittest
run: python -m unittest
@ -39,18 +38,15 @@ jobs:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/master'
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.11
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel "jax[cpu]"
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
python -m pip install -e .[neural,docs]
- uses: actions/checkout@v1
- name: Build documentation
run: sphinx-build -M html docs/source docs/build
uses: ammaraskar/sphinx-action@master
with:
pre-build-command: |
apt-get --allow-releaseinfo-change update -y && apt-get install -y git && git --version
python -m pip install --upgrade pip setuptools wheel "jax[cpu]"
python -m pip install -e .[composable,neural,docs]
docs-folder: "docs/"
- name: Publish documentation
run: |
git clone ${{ github.server_url }}/${{ github.repository }}.git --branch gh-pages --single-branch __gh-pages/

23
.github/workflows/pylint.yml vendored Normal file
View File

@ -0,0 +1,23 @@
name: Pylint
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
- name: Analysing the code with pylint
run: |
pylint $(git ls-files '*.py')

View File

@ -326,7 +326,7 @@ class KDEyMLauto2(KDEyML):
if self.target == 'likelihood':
loss_fn = neg_loglikelihood_prev
else:
loss_fn = lambda prev_hat: qp.error.from_name(self.target)(prevtrue, prev_hat)
loss_fn = lambda prev_hat: qp.error.from_name(self.target)(prev, prev_hat)
pred_prev, neglikelihood = optim_minimize(loss_fn, init_prev, return_loss=True)
loss_accum += neglikelihood

View File

@ -43,7 +43,7 @@ METHODS = [
('KDEy-AE', KDEyMLauto2(newLR(), bandwidth='auto', target='mae', search='grid'), wrap_hyper(logreg_grid)),
('KDEy-AE+', KDEyMLauto2(newLR(), bandwidth='auto', target='mae', search='optim'), wrap_hyper(logreg_grid)),
('KDEy-RAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae', search='grid'), wrap_hyper(logreg_grid)),
('KDEy-RAE+', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae', search='optim'), wrap_hyper(logreg_grid)),
('KDEy-RAE', KDEyMLauto2(newLR(), bandwidth='auto', target='mrae', search='optim'), wrap_hyper(logreg_grid)),
]
@ -167,4 +167,4 @@ if __name__ == '__main__':
for method_name, quantifier, param_grid in METHODS + TRANSDUCTIVE_METHODS:
run_experiment(method_name, quantifier, param_grid)
show_results(global_result_path)
show_results(global_result_path)

View File

@ -28,7 +28,7 @@ def plot(xaxis, metrics_measurements, metrics_names, suffix):
fig, ax1 = plt.subplots(figsize=(8, 6))
def add_plot(ax, mean_error, std_error, name, color, marker):
ax.plot(xaxis, mean_error, label=name, marker=marker, color=color, markersize=3)
ax.plot(xaxis, mean_error, label=name, marker=marker, color=color)
if std_error is not None:
ax.fill_between(xaxis, mean_error - std_error, mean_error + std_error, color=color, alpha=0.2)
@ -74,56 +74,6 @@ def plot(xaxis, metrics_measurements, metrics_names, suffix):
plt.close()
def plot_stack(xaxis, metrics_measurements, metrics_names, suffix):
# Crear la figura y los ejes (4 bloques verticales)
fig, axs = plt.subplots(4, 1, figsize=(8, 12))
x = xaxis
indexes = np.arange(len(metrics_measurements))
axs_idx = 0
# colors = ['b', 'g', 'r', 'c', 'purple']
for m_te, m_tr in zip(indexes[:-1:2], indexes[1::2]):
metric_te, metric_tr = metrics_measurements[m_te], metrics_measurements[m_tr]
metric_te_name, metric_tr_name = metrics_names[m_te], metrics_names[m_tr]
metric_mean_tr = np.mean(metric_tr, axis=0)
metric_std_tr = np.std(metric_tr, axis=0)
metric_mean_te = np.mean(metric_te, axis=0)
metric_std_te = np.std(metric_te, axis=0)
axs[axs_idx].plot(xaxis, metric_mean_tr, label=metric_tr_name, marker='o', color='r', markersize=3)
axs[axs_idx].fill_between(xaxis, metric_mean_tr - metric_std_tr, metric_mean_tr + metric_std_tr, color='r', alpha=0.2)
minx = np.argmin(metric_mean_tr)
axs[axs_idx].axvline(xaxis[minx], color='r', linestyle='--', linewidth=1)
axs[axs_idx].plot(xaxis, metric_mean_te, label=metric_te_name, marker='o', color='b', markersize=3)
axs[axs_idx].fill_between(xaxis, metric_mean_te - metric_std_te, metric_mean_te + metric_std_te, color='b', alpha=0.2)
minx = np.argmin(metric_mean_te)
axs[axs_idx].axvline(xaxis[minx], color='b', linestyle='--', linewidth=1)
# axs[axs_idx].set_title(f'{metric_te_name} and {metric_tr_name}')
axs[axs_idx].legend(loc='lower right')
if axs_idx < len(indexes)//2 -1:
axs[axs_idx].set_xticks([])
axs_idx += 1
# Ajustar el espaciado entre los subplots
plt.tight_layout()
# Mostrar el gráfico
# Mostrar el gráfico
# plt.title(dataset)
# plt.show()
os.makedirs('./plots/likelihood/', exist_ok=True)
plt.savefig(f'./plots/likelihood/{dataset}-fig{suffix}.png')
plt.close()
def generate_data(from_train=False):
data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
n_classes = data.n_classes
@ -160,7 +110,7 @@ def generate_data(from_train=False):
likelihood_value = []
# for bandwidth in np.linspace(0.01, 0.2, 50):
for bandwidth in np.logspace(-4, np.log10(0.2), 50):
for bandwidth in np.logspace(-5, np.log10(0.2), 50):
mix_densities = kde.get_mixture_components(tr_posteriors, tr_y, classes, bandwidth)
test_densities = [kde.pdf(kde_i, te_posteriors) for kde_i in mix_densities]
@ -222,24 +172,16 @@ for i, dataset in enumerate(tqdm(DATASETS, desc='processing datasets', total=len
measurement_names = []
if show_ae:
measurements.append(AE_error_te)
measurement_names.append('AE(te)')
measurements.append(AE_error_tr)
measurement_names.append('AE(tr)')
measurement_names.append('AE')
if show_rae:
measurements.append(RAE_error_te)
measurement_names.append('RAE(te)')
measurements.append(RAE_error_tr)
measurement_names.append('RAE(tr)')
measurement_names.append('RAE')
if show_kld:
measurements.append(KLD_error_te)
measurement_names.append('KLD(te)')
measurements.append(KLD_error_tr)
measurement_names.append('KLD(tr)')
measurement_names.append('KLD')
if show_mse:
measurements.append(MSE_error_te)
measurement_names.append('MSE(te)')
measurements.append(MSE_error_tr)
measurement_names.append('MSE(tr)')
measurement_names.append('MSE')
measurements.append(normalize_metric(LIKE_value_te))
measurements.append(normalize_metric(LIKE_value_tr))
measurement_names.append('NLL(te)')
@ -258,8 +200,7 @@ for i, dataset in enumerate(tqdm(DATASETS, desc='processing datasets', total=len
# measurements.append(normalize_metric(LIKE_value_tr))
# measurement_names.append('NLL(te)')
# measurement_names.append('NLL(tr)')
# plot(xaxis, measurements, measurement_names, suffix='AVEtr')
plot_stack(xaxis, measurements, measurement_names, suffix='AVEtr')
plot(xaxis, measurements, measurement_names, suffix='AVEtr')

View File

@ -1,126 +0,0 @@
import os
from time import time
import numpy as np
from sklearn.linear_model import LogisticRegression
import quapy as qp
#from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2, KDEyMLred
from LocalStack.method import LocalStackingQuantification, LocalStackingQuantification2
from quapy.method.aggregative import PACC, EMQ, KDEyML
from quapy.model_selection import GridSearchQ
from quapy.protocol import UPP
from pathlib import Path
SEED = 1
METHODS = [
('PACC', PACC(), {}),
('EMQ', EMQ(), {}),
('KDEy-ML', KDEyML(), {}),
]
TRANSDUCTIVE_METHODS = [
('LSQ', LocalStackingQuantification(EMQ()), {}),
('LSQ2', LocalStackingQuantification2(EMQ()), {})
]
def show_results(result_path):
import pandas as pd
df = pd.read_csv(result_path + '.csv', sep='\t')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000) # Ajustar el ancho máximo
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["MRAE"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["KLD"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["TR-TIME"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["TE-TIME"], margins=True)
print(pv)
if __name__ == '__main__':
qp.environ['SAMPLE_SIZE'] = 500
qp.environ['N_JOBS'] = -1
n_bags_val = 25
n_bags_test = 100
result_dir = f'results_quantification/localstack'
os.makedirs(result_dir, exist_ok=True)
global_result_path = f'{result_dir}/allmethods'
with open(global_result_path + '.csv', 'wt') as csv:
csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\tTR-TIME\tTE-TIME\n')
for method_name, quantifier, param_grid in METHODS + TRANSDUCTIVE_METHODS:
print('Init method', method_name)
with open(global_result_path + '.csv', 'at') as csv:
for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
print('init', dataset)
# run_experiment(global_result_path, method_name, quantifier, param_grid, dataset)
local_result_path = os.path.join(Path(global_result_path).parent, method_name + '_' + dataset + '.dataframe')
if os.path.exists(local_result_path):
print(f'result file {local_result_path} already exist; skipping')
report = qp.util.load_report(local_result_path)
else:
with qp.util.temp_seed(SEED):
data = qp.datasets.fetch_UCIMulticlassDataset(dataset, verbose=True)
train, test = data.train_test
transductive_names = [name for (name, *_) in TRANSDUCTIVE_METHODS]
if method_name not in transductive_names:
if len(param_grid) == 0:
t_init = time()
quantifier.fit(train)
train_time = time() - t_init
else:
# model selection (train)
train, val = train.split_stratified(random_state=SEED)
protocol = UPP(val, repeats=n_bags_val)
modsel = GridSearchQ(
quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=1, error='mae'
)
t_init = time()
try:
modsel.fit(train)
print(f'best params {modsel.best_params_}')
print(f'best score {modsel.best_score_}')
quantifier = modsel.best_model()
except:
print('something went wrong... trying to fit the default model')
quantifier.fit(train)
train_time = time() - t_init
else:
# transductive
t_init = time()
quantifier.fit(train) # <-- nothing actually (proyects the X into posteriors only)
train_time = time() - t_init
# test
t_init = time()
protocol = UPP(test, repeats=n_bags_test)
report = qp.evaluation.evaluation_report(
quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True
)
test_time = time() - t_init
report['tr_time'] = train_time
report['te_time'] = test_time
report.to_csv(local_result_path)
means = report.mean(numeric_only=True)
csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\t{means["tr_time"]:.3f}\t{means["te_time"]:.3f}\n')
csv.flush()
show_results(global_result_path)

View File

@ -1,112 +0,0 @@
import numpy as np
import quapy as qp
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier
from quapy.method.aggregative import AggregativeSoftQuantifier
class LocalStackingQuantification(BaseQuantifier):
def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae', random_state=None):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier
self.n_samples_gen = n_samples_gen
self.n_samples_sel = n_samples_sel
self.comparison_measure = qp.error.from_name(comparison_measure)
self.random_state = random_state
def fit(self, data: LabelledCollection):
train, val = data.split_stratified()
self.surrogate_quantifier.fit(train)
self.val_data = val
return self
def normalize(self, out_simplex:np.ndarray):
in_simplex = out_simplex/out_simplex.sum()
return in_simplex
def quantify(self, instances: np.ndarray):
assert hasattr(self, 'val_data'), 'quantify called before fit'
pred_prevs = self.surrogate_quantifier.quantify(instances)
test_size = instances.shape[0]
samples = []
samples_pred_prevs = []
samples_distance = []
for i in range(self.n_samples_gen):
sample_i = self.val_data.sampling(test_size, *pred_prevs, random_state=self.random_state)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
err_dist = self.comparison_measure(pred_prevs, pred_prev_sample_i)
samples.append(sample_i)
samples_pred_prevs.append(pred_prev_sample_i)
samples_distance.append(err_dist)
ord_distances = np.argsort(samples_distance)
samples_sel = np.asarray(samples)[ord_distances][:self.n_samples_sel]
samples_pred_prevs_sel = np.asarray(samples_pred_prevs)[ord_distances][:self.n_samples_sel]
reg = MultiOutputRegressor(SVR())
reg_X = samples_pred_prevs_sel
reg_y = [s.prevalence() for s in samples_sel]
reg.fit(reg_X, reg_y)
corrected_prev = reg.predict([pred_prevs])[0]
corrected_prev = self.normalize(corrected_prev)
return corrected_prev
class LocalStackingQuantification2(BaseQuantifier):
"""
Este en vez de seleccionar samples de training para los que la prevalencia predicha se parece a la prevalencia
predica en test, saca directamente samples de training con la prevalencia predicha en test
"""
def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae', random_state=None):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier
self.n_samples_gen = n_samples_gen
self.n_samples_sel = n_samples_sel
self.comparison_measure = qp.error.from_name(comparison_measure)
self.random_state = random_state
def fit(self, data: LabelledCollection):
train, val = data.split_stratified()
self.surrogate_quantifier.fit(train)
self.val_data = val
return self
def normalize(self, out_simplex:np.ndarray):
in_simplex = out_simplex/out_simplex.sum()
return in_simplex
def quantify(self, instances: np.ndarray):
assert hasattr(self, 'val_data'), 'quantify called before fit'
pred_prevs = self.surrogate_quantifier.quantify(instances)
test_size = instances.shape[0]
samples = []
samples_pred_prevs = []
for i in range(self.n_samples_gen):
sample_i = self.val_data.sampling(test_size, *pred_prevs, random_state=self.random_state)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
samples.append(sample_i)
samples_pred_prevs.append(pred_prev_sample_i)
reg = MultiOutputRegressor(SVR())
reg_X = samples_pred_prevs
reg_y = [s.prevalence() for s in samples]
reg.fit(reg_X, reg_y)
corrected_prev = reg.predict([pred_prevs])[0]
corrected_prev = self.normalize(corrected_prev)
return corrected_prev

View File

@ -11,14 +11,9 @@ import sys
from os.path import join
quapy_path = join(pathlib.Path(__file__).parents[2].resolve().as_posix(), 'quapy')
wiki_path = join(pathlib.Path(__file__).parents[0].resolve().as_posix(), 'wiki')
source_path = pathlib.Path(__file__).parents[2].resolve().as_posix()
print(f'quapy path={quapy_path}')
print(f'quapy source path={source_path}')
sys.path.insert(0, quapy_path)
sys.path.insert(0, wiki_path)
sys.path.insert(0, source_path)
print(sys.path)
project = 'QuaPy: A Python-based open-source framework for quantification'

View File

@ -447,7 +447,7 @@ The [](quapy.method.composable) module allows the composition of quantification
```sh
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
pip install quapy[composable]
```
### Basics

View File

@ -2,13 +2,6 @@
This example illustrates the composition of quantification methods from
arbitrary loss functions and feature transformations. It will extend the basic
example on the usage of quapy with this composition.
This example requires the installation of qunfold, the back-end of QuaPy's
composition module:
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
"""
import numpy as np

View File

@ -1,57 +1,45 @@
"""This module allows the composition of quantification methods from loss functions and feature transformations. This functionality is realized through an integration of the qunfold package: https://github.com/mirkobunse/qunfold."""
_import_error_message = """qunfold, the back-end of quapy.method.composable, is not properly installed.
import qunfold
from qunfold.quapy import QuaPyWrapper
from qunfold.sklearn import CVClassifier
from qunfold import (
LeastSquaresLoss, # losses
BlobelLoss,
EnergyLoss,
HellingerSurrogateLoss,
CombinedLoss,
TikhonovRegularization,
TikhonovRegularized,
ClassTransformer, # transformers
HistogramTransformer,
DistanceTransformer,
KernelTransformer,
EnergyKernelTransformer,
LaplacianKernelTransformer,
GaussianKernelTransformer,
GaussianRFFKernelTransformer,
)
To fix this error, call:
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
"""
try:
import qunfold
from qunfold.quapy import QuaPyWrapper
from qunfold.sklearn import CVClassifier
from qunfold import (
LeastSquaresLoss, # losses
BlobelLoss,
EnergyLoss,
HellingerSurrogateLoss,
CombinedLoss,
TikhonovRegularization,
TikhonovRegularized,
ClassTransformer, # transformers
HistogramTransformer,
DistanceTransformer,
KernelTransformer,
EnergyKernelTransformer,
LaplacianKernelTransformer,
GaussianKernelTransformer,
GaussianRFFKernelTransformer,
)
__all__ = [ # control public members, e.g., for auto-documentation in sphinx; omit QuaPyWrapper
"ComposableQuantifier",
"CVClassifier",
"LeastSquaresLoss",
"BlobelLoss",
"EnergyLoss",
"HellingerSurrogateLoss",
"CombinedLoss",
"TikhonovRegularization",
"TikhonovRegularized",
"ClassTransformer",
"HistogramTransformer",
"DistanceTransformer",
"KernelTransformer",
"EnergyKernelTransformer",
"LaplacianKernelTransformer",
"GaussianKernelTransformer",
"GaussianRFFKernelTransformer",
]
except ImportError as e:
raise ImportError(_import_error_message) from e
__all__ = [ # control public members, e.g., for auto-documentation in sphinx; omit QuaPyWrapper
"ComposableQuantifier",
"CVClassifier",
"LeastSquaresLoss",
"BlobelLoss",
"EnergyLoss",
"HellingerSurrogateLoss",
"CombinedLoss",
"TikhonovRegularization",
"TikhonovRegularized",
"ClassTransformer",
"HistogramTransformer",
"DistanceTransformer",
"KernelTransformer",
"EnergyKernelTransformer",
"LaplacianKernelTransformer",
"GaussianKernelTransformer",
"GaussianRFFKernelTransformer",
]
def ComposableQuantifier(loss, transformer, **kwargs):
"""A generic quantification / unfolding method that solves a linear system of equations.

View File

@ -125,6 +125,7 @@ setup(
# projects.
extras_require={ # Optional
'bayes': ['jax', 'jaxlib', 'numpyro'],
'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4'],
'neural': ['torch'],
'tests': ['certifi'],
'docs' : ['sphinx-rtd-theme', 'myst-parser'],