Merge branch 'localstack' of gitea-s2i2s.isti.cnr.it:moreo/QuaPy into localstack

This commit is contained in:
Alejandro Moreo Fernandez 2024-09-26 16:06:01 +02:00
commit 641228bf62
9 changed files with 317 additions and 75 deletions

View File

@ -20,15 +20,16 @@ jobs:
env:
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install -e .[bayes,composable,tests]
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
python -m pip install -e .[bayes,tests]
- name: Test with unittest
run: python -m unittest
@ -38,15 +39,18 @@ jobs:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/master'
steps:
- uses: actions/checkout@v1
- name: Build documentation
uses: ammaraskar/sphinx-action@master
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
pre-build-command: |
apt-get --allow-releaseinfo-change update -y && apt-get install -y git && git --version
python -m pip install --upgrade pip setuptools wheel "jax[cpu]"
python -m pip install -e .[composable,neural,docs]
docs-folder: "docs/"
python-version: 3.11
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools wheel "jax[cpu]"
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
python -m pip install -e .[neural,docs]
- name: Build documentation
run: sphinx-build -M html docs/source docs/build
- name: Publish documentation
run: |
git clone ${{ github.server_url }}/${{ github.repository }}.git --branch gh-pages --single-branch __gh-pages/

View File

@ -1,23 +0,0 @@
name: Pylint
on: [push]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
- name: Analysing the code with pylint
run: |
pylint $(git ls-files '*.py')

126
LocalStack/experiments.py Normal file
View File

@ -0,0 +1,126 @@
import os
from time import time
import numpy as np
from sklearn.linear_model import LogisticRegression
import quapy as qp
from KDEy.kdey_devel import KDEyMLauto, KDEyMLauto2, KDEyMLred
from LocalStack.method import LocalStackingQuantification, LocalStackingQuantification2
from quapy.method.aggregative import PACC, EMQ, KDEyML
from quapy.model_selection import GridSearchQ
from quapy.protocol import UPP
from pathlib import Path
SEED = 1
METHODS = [
('PACC', PACC(), {}),
('EMQ', EMQ(), {}),
('KDEy-ML', KDEyML(), {}),
]
TRANSDUCTIVE_METHODS = [
('LSQ', LocalStackingQuantification(EMQ()), {}),
('LSQ2', LocalStackingQuantification2(EMQ()), {})
]
def show_results(result_path):
import pandas as pd
df = pd.read_csv(result_path + '.csv', sep='\t')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000) # Ajustar el ancho máximo
pv = df.pivot_table(index='Dataset', columns="Method", values=["MAE"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["MRAE"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["KLD"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["TR-TIME"], margins=True)
print(pv)
pv = df.pivot_table(index='Dataset', columns="Method", values=["TE-TIME"], margins=True)
print(pv)
if __name__ == '__main__':
qp.environ['SAMPLE_SIZE'] = 500
qp.environ['N_JOBS'] = -1
n_bags_val = 25
n_bags_test = 100
result_dir = f'results_quantification/localstack'
os.makedirs(result_dir, exist_ok=True)
global_result_path = f'{result_dir}/allmethods'
with open(global_result_path + '.csv', 'wt') as csv:
csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\tTR-TIME\tTE-TIME\n')
for method_name, quantifier, param_grid in METHODS + TRANSDUCTIVE_METHODS:
print('Init method', method_name)
with open(global_result_path + '.csv', 'at') as csv:
for dataset in qp.datasets.UCI_MULTICLASS_DATASETS:
print('init', dataset)
# run_experiment(global_result_path, method_name, quantifier, param_grid, dataset)
local_result_path = os.path.join(Path(global_result_path).parent, method_name + '_' + dataset + '.dataframe')
if os.path.exists(local_result_path):
print(f'result file {local_result_path} already exist; skipping')
report = qp.util.load_report(local_result_path)
else:
with qp.util.temp_seed(SEED):
data = qp.datasets.fetch_UCIMulticlassDataset(dataset, verbose=True)
train, test = data.train_test
transductive_names = [name for (name, *_) in TRANSDUCTIVE_METHODS]
if method_name not in transductive_names:
if len(param_grid) == 0:
t_init = time()
quantifier.fit(train)
train_time = time() - t_init
else:
# model selection (train)
train, val = train.split_stratified(random_state=SEED)
protocol = UPP(val, repeats=n_bags_val)
modsel = GridSearchQ(
quantifier, param_grid, protocol, refit=True, n_jobs=-1, verbose=1, error='mae'
)
t_init = time()
try:
modsel.fit(train)
print(f'best params {modsel.best_params_}')
print(f'best score {modsel.best_score_}')
quantifier = modsel.best_model()
except:
print('something went wrong... trying to fit the default model')
quantifier.fit(train)
train_time = time() - t_init
else:
# transductive
t_init = time()
quantifier.fit(train) # <-- nothing actually (proyects the X into posteriors only)
train_time = time() - t_init
# test
t_init = time()
protocol = UPP(test, repeats=n_bags_test)
report = qp.evaluation.evaluation_report(
quantifier, protocol, error_metrics=['mae', 'mrae', 'kld'], verbose=True
)
test_time = time() - t_init
report['tr_time'] = train_time
report['te_time'] = test_time
report.to_csv(local_result_path)
means = report.mean(numeric_only=True)
csv.write(f'{method_name}\t{dataset}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\t{means["tr_time"]:.3f}\t{means["te_time"]:.3f}\n')
csv.flush()
show_results(global_result_path)

112
LocalStack/method.py Normal file
View File

@ -0,0 +1,112 @@
import numpy as np
import quapy as qp
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import SVR
from data import LabelledCollection
from quapy.method.base import BaseQuantifier
from quapy.method.aggregative import AggregativeSoftQuantifier
class LocalStackingQuantification(BaseQuantifier):
def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae', random_state=None):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier
self.n_samples_gen = n_samples_gen
self.n_samples_sel = n_samples_sel
self.comparison_measure = qp.error.from_name(comparison_measure)
self.random_state = random_state
def fit(self, data: LabelledCollection):
train, val = data.split_stratified()
self.surrogate_quantifier.fit(train)
self.val_data = val
return self
def normalize(self, out_simplex:np.ndarray):
in_simplex = out_simplex/out_simplex.sum()
return in_simplex
def quantify(self, instances: np.ndarray):
assert hasattr(self, 'val_data'), 'quantify called before fit'
pred_prevs = self.surrogate_quantifier.quantify(instances)
test_size = instances.shape[0]
samples = []
samples_pred_prevs = []
samples_distance = []
for i in range(self.n_samples_gen):
sample_i = self.val_data.sampling(test_size, *pred_prevs, random_state=self.random_state)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
err_dist = self.comparison_measure(pred_prevs, pred_prev_sample_i)
samples.append(sample_i)
samples_pred_prevs.append(pred_prev_sample_i)
samples_distance.append(err_dist)
ord_distances = np.argsort(samples_distance)
samples_sel = np.asarray(samples)[ord_distances][:self.n_samples_sel]
samples_pred_prevs_sel = np.asarray(samples_pred_prevs)[ord_distances][:self.n_samples_sel]
reg = MultiOutputRegressor(SVR())
reg_X = samples_pred_prevs_sel
reg_y = [s.prevalence() for s in samples_sel]
reg.fit(reg_X, reg_y)
corrected_prev = reg.predict([pred_prevs])[0]
corrected_prev = self.normalize(corrected_prev)
return corrected_prev
class LocalStackingQuantification2(BaseQuantifier):
"""
Este en vez de seleccionar samples de training para los que la prevalencia predicha se parece a la prevalencia
predica en test, saca directamente samples de training con la prevalencia predicha en test
"""
def __init__(self, surrogate_quantifier, n_samples_gen=200, n_samples_sel=50, comparison_measure='ae', random_state=None):
assert isinstance(surrogate_quantifier, AggregativeSoftQuantifier), \
f'the surrogate quantifier must be of type {AggregativeSoftQuantifier.__class__.__name__}'
self.surrogate_quantifier = surrogate_quantifier
self.n_samples_gen = n_samples_gen
self.n_samples_sel = n_samples_sel
self.comparison_measure = qp.error.from_name(comparison_measure)
self.random_state = random_state
def fit(self, data: LabelledCollection):
train, val = data.split_stratified()
self.surrogate_quantifier.fit(train)
self.val_data = val
return self
def normalize(self, out_simplex:np.ndarray):
in_simplex = out_simplex/out_simplex.sum()
return in_simplex
def quantify(self, instances: np.ndarray):
assert hasattr(self, 'val_data'), 'quantify called before fit'
pred_prevs = self.surrogate_quantifier.quantify(instances)
test_size = instances.shape[0]
samples = []
samples_pred_prevs = []
for i in range(self.n_samples_gen):
sample_i = self.val_data.sampling(test_size, *pred_prevs, random_state=self.random_state)
pred_prev_sample_i = self.surrogate_quantifier.quantify(sample_i.X)
samples.append(sample_i)
samples_pred_prevs.append(pred_prev_sample_i)
reg = MultiOutputRegressor(SVR())
reg_X = samples_pred_prevs
reg_y = [s.prevalence() for s in samples]
reg.fit(reg_X, reg_y)
corrected_prev = reg.predict([pred_prevs])[0]
corrected_prev = self.normalize(corrected_prev)
return corrected_prev

View File

@ -11,9 +11,14 @@ import sys
from os.path import join
quapy_path = join(pathlib.Path(__file__).parents[2].resolve().as_posix(), 'quapy')
wiki_path = join(pathlib.Path(__file__).parents[0].resolve().as_posix(), 'wiki')
source_path = pathlib.Path(__file__).parents[2].resolve().as_posix()
print(f'quapy path={quapy_path}')
print(f'quapy source path={source_path}')
sys.path.insert(0, quapy_path)
sys.path.insert(0, wiki_path)
sys.path.insert(0, source_path)
print(sys.path)
project = 'QuaPy: A Python-based open-source framework for quantification'

View File

@ -447,7 +447,7 @@ The [](quapy.method.composable) module allows the composition of quantification
```sh
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install quapy[composable]
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
```
### Basics

View File

@ -2,6 +2,13 @@
This example illustrates the composition of quantification methods from
arbitrary loss functions and feature transformations. It will extend the basic
example on the usage of quapy with this composition.
This example requires the installation of qunfold, the back-end of QuaPy's
composition module:
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
"""
import numpy as np

View File

@ -1,45 +1,57 @@
"""This module allows the composition of quantification methods from loss functions and feature transformations. This functionality is realized through an integration of the qunfold package: https://github.com/mirkobunse/qunfold."""
import qunfold
from qunfold.quapy import QuaPyWrapper
from qunfold.sklearn import CVClassifier
from qunfold import (
LeastSquaresLoss, # losses
BlobelLoss,
EnergyLoss,
HellingerSurrogateLoss,
CombinedLoss,
TikhonovRegularization,
TikhonovRegularized,
ClassTransformer, # transformers
HistogramTransformer,
DistanceTransformer,
KernelTransformer,
EnergyKernelTransformer,
LaplacianKernelTransformer,
GaussianKernelTransformer,
GaussianRFFKernelTransformer,
)
_import_error_message = """qunfold, the back-end of quapy.method.composable, is not properly installed.
__all__ = [ # control public members, e.g., for auto-documentation in sphinx; omit QuaPyWrapper
"ComposableQuantifier",
"CVClassifier",
"LeastSquaresLoss",
"BlobelLoss",
"EnergyLoss",
"HellingerSurrogateLoss",
"CombinedLoss",
"TikhonovRegularization",
"TikhonovRegularized",
"ClassTransformer",
"HistogramTransformer",
"DistanceTransformer",
"KernelTransformer",
"EnergyKernelTransformer",
"LaplacianKernelTransformer",
"GaussianKernelTransformer",
"GaussianRFFKernelTransformer",
]
To fix this error, call:
pip install --upgrade pip setuptools wheel
pip install "jax[cpu]"
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4"
"""
try:
import qunfold
from qunfold.quapy import QuaPyWrapper
from qunfold.sklearn import CVClassifier
from qunfold import (
LeastSquaresLoss, # losses
BlobelLoss,
EnergyLoss,
HellingerSurrogateLoss,
CombinedLoss,
TikhonovRegularization,
TikhonovRegularized,
ClassTransformer, # transformers
HistogramTransformer,
DistanceTransformer,
KernelTransformer,
EnergyKernelTransformer,
LaplacianKernelTransformer,
GaussianKernelTransformer,
GaussianRFFKernelTransformer,
)
__all__ = [ # control public members, e.g., for auto-documentation in sphinx; omit QuaPyWrapper
"ComposableQuantifier",
"CVClassifier",
"LeastSquaresLoss",
"BlobelLoss",
"EnergyLoss",
"HellingerSurrogateLoss",
"CombinedLoss",
"TikhonovRegularization",
"TikhonovRegularized",
"ClassTransformer",
"HistogramTransformer",
"DistanceTransformer",
"KernelTransformer",
"EnergyKernelTransformer",
"LaplacianKernelTransformer",
"GaussianKernelTransformer",
"GaussianRFFKernelTransformer",
]
except ImportError as e:
raise ImportError(_import_error_message) from e
def ComposableQuantifier(loss, transformer, **kwargs):
"""A generic quantification / unfolding method that solves a linear system of equations.

View File

@ -125,7 +125,6 @@ setup(
# projects.
extras_require={ # Optional
'bayes': ['jax', 'jaxlib', 'numpyro'],
'composable': ['qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.4'],
'neural': ['torch'],
'tests': ['certifi'],
'docs' : ['sphinx-rtd-theme', 'myst-parser'],