Compare commits
2 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
bb0950fad5 | |
|
|
2e992a0b9a |
|
|
@ -1,108 +0,0 @@
|
||||||
name: CI
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- devel
|
|
||||||
tags:
|
|
||||||
- "[0-9]+.[0-9]+.[0-9]+"
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
|
|
||||||
# take out unit tests
|
|
||||||
test:
|
|
||||||
name: Unit tests (Python ${{ matrix.python-version }})
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
python-version:
|
|
||||||
- "3.11"
|
|
||||||
env:
|
|
||||||
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip setuptools wheel
|
|
||||||
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@main"
|
|
||||||
python -m pip install -e .[bayes,tests]
|
|
||||||
- name: Test with unittest
|
|
||||||
run: python -m unittest
|
|
||||||
|
|
||||||
# build and push documentation to gh-pages (only if pushed to the master branch)
|
|
||||||
docs:
|
|
||||||
name: Documentation
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: github.ref == 'refs/heads/master'
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: 3.11
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip setuptools wheel "jax[cpu]"
|
|
||||||
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@main"
|
|
||||||
python -m pip install -e .[neural,docs]
|
|
||||||
- name: Build documentation
|
|
||||||
run: sphinx-build -M html docs/source docs/build
|
|
||||||
- name: Publish documentation
|
|
||||||
run: |
|
|
||||||
git clone ${{ github.server_url }}/${{ github.repository }}.git --branch gh-pages --single-branch __gh-pages/
|
|
||||||
cp -r docs/build/html/* __gh-pages/
|
|
||||||
cd __gh-pages/
|
|
||||||
git config --local user.email "action@github.com"
|
|
||||||
git config --local user.name "GitHub Action"
|
|
||||||
git add .
|
|
||||||
git commit -am "Documentation based on ${{ github.sha }}" || true
|
|
||||||
- name: Push changes
|
|
||||||
uses: ad-m/github-push-action@master
|
|
||||||
with:
|
|
||||||
branch: gh-pages
|
|
||||||
directory: __gh-pages/
|
|
||||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
release:
|
|
||||||
name: Build & Publish Release
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
- name: Set up Python
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
- name: Install build dependencies
|
|
||||||
run: |
|
|
||||||
python -m pip install --upgrade pip build twine
|
|
||||||
- name: Build package
|
|
||||||
run: python -m build
|
|
||||||
- name: Publish to TestPyPI
|
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
|
||||||
with:
|
|
||||||
user: __token__
|
|
||||||
# use these for TESTs!
|
|
||||||
# password: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
|
||||||
# repository_url: https://test.pypi.org/legacy/
|
|
||||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
||||||
repository_url: https://upload.pypi.org/legacy/
|
|
||||||
- name: Create GitHub Release
|
|
||||||
id: create_release
|
|
||||||
uses: actions/create-release@v1
|
|
||||||
with:
|
|
||||||
tag_name: ${{ github.ref_name }}
|
|
||||||
release_name: Release ${{ github.ref_name }}
|
|
||||||
body: |
|
|
||||||
Changes in this release:
|
|
||||||
- see commit history for details
|
|
||||||
draft: false
|
|
||||||
prerelease: false
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
name: Pylint
|
||||||
|
|
||||||
|
on: [push]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-version: ["3.8", "3.9", "3.10"]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v3
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install pylint
|
||||||
|
- name: Analysing the code with pylint
|
||||||
|
run: |
|
||||||
|
pylint $(git ls-files '*.py')
|
||||||
|
|
@ -69,12 +69,8 @@ instance/
|
||||||
# Scrapy stuff:
|
# Scrapy stuff:
|
||||||
.scrapy
|
.scrapy
|
||||||
|
|
||||||
# vscode config:
|
|
||||||
.vscode/
|
|
||||||
|
|
||||||
# Sphinx documentation
|
# Sphinx documentation
|
||||||
docs/_build/doctest
|
docs/_build/
|
||||||
docs/_build/doctrees
|
|
||||||
|
|
||||||
# PyBuilder
|
# PyBuilder
|
||||||
target/
|
target/
|
||||||
|
|
@ -89,11 +85,6 @@ ipython_config.py
|
||||||
# pyenv
|
# pyenv
|
||||||
.python-version
|
.python-version
|
||||||
|
|
||||||
# poetry
|
|
||||||
poetry.toml
|
|
||||||
pyproject.toml
|
|
||||||
poetry.lock
|
|
||||||
|
|
||||||
# pipenv
|
# pipenv
|
||||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
|
@ -139,33 +130,3 @@ dmypy.json
|
||||||
.pyre/
|
.pyre/
|
||||||
|
|
||||||
*__pycache__*
|
*__pycache__*
|
||||||
*.pdf
|
|
||||||
*.zip
|
|
||||||
*.png
|
|
||||||
*.csv
|
|
||||||
*.pkl
|
|
||||||
*.dataframe
|
|
||||||
|
|
||||||
|
|
||||||
# other projects
|
|
||||||
LeQua2022
|
|
||||||
MultiLabel
|
|
||||||
NewMethods
|
|
||||||
Ordinal
|
|
||||||
Retrieval
|
|
||||||
eDiscovery
|
|
||||||
poster-cikm
|
|
||||||
slides-cikm
|
|
||||||
slides-short-cikm
|
|
||||||
quick_experiment
|
|
||||||
svm_perf_quantification/svm_struct
|
|
||||||
svm_perf_quantification/svm_light
|
|
||||||
TweetSentQuant
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
*.png
|
|
||||||
.idea
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,73 @@
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
import seaborn as sns
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
palette = itertools.cycle(sns.color_palette())
|
||||||
|
|
||||||
|
def setframe():
|
||||||
|
fig.spines['top'].set_visible(False)
|
||||||
|
fig.spines['left'].set_visible(False)
|
||||||
|
fig.get_yaxis().set_ticks([])
|
||||||
|
fig.spines['right'].set_visible(False)
|
||||||
|
# fig.axis('off')
|
||||||
|
|
||||||
|
nbins = 50
|
||||||
|
figsize = (5, 2)
|
||||||
|
ymax = 0.2
|
||||||
|
|
||||||
|
negatives = np.random.normal(loc = 0.3, scale=0.2, size=20000)
|
||||||
|
negatives = np.asarray([x for x in negatives if 0 <= x <= 1])
|
||||||
|
|
||||||
|
plt.figure(figsize=figsize)
|
||||||
|
plt.xlim(0, 1)
|
||||||
|
plt.ylim(0, ymax)
|
||||||
|
fig = sns.histplot(data=negatives, binrange=(0,1), bins=nbins, stat='probability', color=next(palette))
|
||||||
|
plt.title('Negative distribution')
|
||||||
|
fig.set(yticklabels=[])
|
||||||
|
fig.set(ylabel=None)
|
||||||
|
setframe()
|
||||||
|
# fig.get_figure().savefig('plots_cacm/negatives.pdf')
|
||||||
|
# plt.clf()
|
||||||
|
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
|
||||||
|
positives1 = np.random.normal(loc = 0.75, scale=0.06, size=20000)
|
||||||
|
positives2 = np.random.normal(loc = 0.65, scale=0.1, size=1)
|
||||||
|
positives = np.concatenate([positives1, positives2])
|
||||||
|
np.random.shuffle(positives)
|
||||||
|
positives = np.asarray([x for x in positives if 0 <= x <= 1])
|
||||||
|
|
||||||
|
# plt.figure(figsize=figsize)
|
||||||
|
plt.xlim(0, 1)
|
||||||
|
plt.ylim(0, ymax)
|
||||||
|
fig = sns.histplot(data=positives, binrange=(0,1), bins=nbins, stat='probability', color=next(palette))
|
||||||
|
plt.title('')
|
||||||
|
fig.set(yticklabels=[])
|
||||||
|
fig.set(ylabel=None)
|
||||||
|
setframe()
|
||||||
|
fig.get_figure().savefig('plots_cacm/training.pdf')
|
||||||
|
|
||||||
|
# -------------------------------------------------------------
|
||||||
|
|
||||||
|
prev = 0.2
|
||||||
|
test = np.concatenate([
|
||||||
|
negatives[:int(len(negatives)*(1-prev))],
|
||||||
|
positives[:int(len(positives)*(prev))],
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
plt.figure(figsize=figsize)
|
||||||
|
plt.xlim(0, 1)
|
||||||
|
plt.ylim(0, ymax)
|
||||||
|
fig = sns.histplot(data=test, binrange=(0,1), bins=nbins, stat='probability', color=next(palette))
|
||||||
|
plt.title('')
|
||||||
|
fig.set(yticklabels=[])
|
||||||
|
fig.set(ylabel=None)
|
||||||
|
setframe()
|
||||||
|
fig.get_figure().savefig('plots_cacm/test.pdf')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,86 @@
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
from method.non_aggregative import DMx
|
||||||
|
from protocol import APP
|
||||||
|
from quapy.method.aggregative import CC, ACC, DMy
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
|
||||||
|
qp.environ['SAMPLE_SIZE'] = 100
|
||||||
|
DATASETS = qp.datasets.UCI_DATASETS[10:]
|
||||||
|
|
||||||
|
def fit_eval_task(args):
|
||||||
|
model_name, model, train, test = args
|
||||||
|
with qp.util.temp_seed(0):
|
||||||
|
model = deepcopy(model)
|
||||||
|
model.fit(train)
|
||||||
|
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
||||||
|
return model_name, true_prev, estim_prev
|
||||||
|
|
||||||
|
|
||||||
|
def gen_data():
|
||||||
|
|
||||||
|
def base_classifier():
|
||||||
|
return LogisticRegression()
|
||||||
|
#return LinearSVC(class_weight='balanced')
|
||||||
|
|
||||||
|
|
||||||
|
def models():
|
||||||
|
yield 'CC', CC(base_classifier())
|
||||||
|
yield 'ACC', ACC(base_classifier())
|
||||||
|
yield 'HDy', DMy(base_classifier(), val_split=10, nbins=10, n_jobs=-1)
|
||||||
|
yield 'HDx', DMx(nbins=10, n_jobs=-1)
|
||||||
|
|
||||||
|
# train, test = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=10).train_test
|
||||||
|
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
|
||||||
|
|
||||||
|
for dataset_name in DATASETS:
|
||||||
|
train, test = qp.datasets.fetch_UCIDataset(dataset_name).train_test
|
||||||
|
print(dataset_name, train.X.shape)
|
||||||
|
|
||||||
|
outs = qp.util.parallel(
|
||||||
|
fit_eval_task,
|
||||||
|
((method_name, model, train, test) for method_name, model in models()),
|
||||||
|
seed=0,
|
||||||
|
n_jobs=-1
|
||||||
|
)
|
||||||
|
|
||||||
|
for method_name, true_prev, estim_prev in outs:
|
||||||
|
method_names.append(method_name)
|
||||||
|
true_prevs.append(true_prev)
|
||||||
|
estim_prevs.append(estim_prev)
|
||||||
|
tr_prevs.append(train.prevalence())
|
||||||
|
|
||||||
|
return method_names, true_prevs, estim_prevs, tr_prevs
|
||||||
|
|
||||||
|
method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('../quick_experiment/pickled_plot_data.pkl', gen_data)
|
||||||
|
|
||||||
|
def remove_dataset(dataset_order, num_methods=4):
|
||||||
|
sel_names, sel_true, sel_estim, sel_tr = [],[],[],[]
|
||||||
|
for i, (name, true, estim, tr) in enumerate(zip(method_names, true_prevs, estim_prevs, tr_prevs)):
|
||||||
|
dataset_pos = i//num_methods
|
||||||
|
if dataset_pos not in dataset_order:
|
||||||
|
sel_names.append(name)
|
||||||
|
sel_true.append(true)
|
||||||
|
sel_estim.append(estim)
|
||||||
|
sel_tr.append(tr)
|
||||||
|
return np.asarray(sel_names), np.asarray(sel_true), np.asarray(sel_estim), np.asarray(sel_tr)
|
||||||
|
|
||||||
|
print(DATASETS)
|
||||||
|
selected = 10
|
||||||
|
for i in [selected]:
|
||||||
|
print(i, DATASETS[i])
|
||||||
|
all_ = set(range(len(DATASETS)))
|
||||||
|
remove_index = sorted(all_ - {i})
|
||||||
|
sel_names, sel_true, sel_estim, sel_tr = remove_dataset(dataset_order=remove_index, num_methods=4)
|
||||||
|
|
||||||
|
p=sel_tr[0][1]
|
||||||
|
sel_names = ['CC$_{'+str(p)+'}$' if x=='CC' else x for x in sel_names]
|
||||||
|
|
||||||
|
# qp.plot.binary_diagonal(sel_names, sel_true, sel_estim, train_prev=sel_tr[0], show_std=False, savepath=f'./plots/bin_diag_{i}.png')
|
||||||
|
qp.plot.error_by_drift(sel_names, sel_true, sel_estim, sel_tr, n_bins=10, savepath=f'./plots/err_drift_{i}.png', show_std=True, show_density=False, title="")
|
||||||
|
# qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
|
||||||
|
# qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, nbins=3, savepath='./plots/bin_bias_bin.png')
|
||||||
|
|
@ -0,0 +1,62 @@
|
||||||
|
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.model_selection import train_test_split, cross_val_predict
|
||||||
|
from sklearn.neighbors import KernelDensity
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from data import LabelledCollection
|
||||||
|
|
||||||
|
scale = 100
|
||||||
|
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
|
||||||
|
negatives = np.random.normal(loc = 0.2, scale=0.2, size=20000)
|
||||||
|
negatives = np.asarray([x for x in negatives if 0 <= x <= 1])
|
||||||
|
|
||||||
|
positives = np.random.normal(loc = 0.75, scale=0.05, size=20000)
|
||||||
|
positives = np.asarray([x for x in positives if 0 <= x <= 1])
|
||||||
|
|
||||||
|
prev = 0.1
|
||||||
|
test = np.concatenate([
|
||||||
|
negatives[:int(len(negatives)*(1-prev))],
|
||||||
|
positives[:int(len(positives)*(prev))],
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
nbins = 30
|
||||||
|
|
||||||
|
plt.rcParams.update({'font.size': 7})
|
||||||
|
|
||||||
|
fig = plt.figure()
|
||||||
|
positions = np.asarray([2,1,0])
|
||||||
|
colors = ['r', 'g', 'b']
|
||||||
|
|
||||||
|
|
||||||
|
ax = fig.add_subplot(111, projection='3d')
|
||||||
|
ax.set_box_aspect((3, 1, 0.8))
|
||||||
|
|
||||||
|
for post, c, z in zip([test, positives, negatives], colors, positions):
|
||||||
|
|
||||||
|
hist, bins = np.histogram(post, bins=np.linspace(0,1, nbins+1), density=True)
|
||||||
|
xs = (bins[:-1] + bins[1:])/2
|
||||||
|
|
||||||
|
ax.bar(xs, hist, width=1 / nbins, zs=z, zdir='y', color=c, ec=c, alpha=0.6)
|
||||||
|
|
||||||
|
|
||||||
|
ax.yaxis.set_ticks(positions)
|
||||||
|
ax.yaxis.set_ticklabels([' '*20+'Test distribution', ' '*20+'Positive distribution', ' '*20+'Negative distribution'])
|
||||||
|
# ax.xaxis.set_ticks([])
|
||||||
|
# ax.xaxis.set_ticklabels([], minor=True)
|
||||||
|
ax.zaxis.set_ticks([])
|
||||||
|
ax.zaxis.set_ticklabels([], minor=True)
|
||||||
|
|
||||||
|
|
||||||
|
#plt.figure(figsize=(10,6))
|
||||||
|
#plt.show()
|
||||||
|
plt.savefig('./histograms3d_CACM2023.pdf')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
from sklearn.decomposition import TruncatedSVD
|
||||||
|
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
from data import LabelledCollection
|
||||||
|
from method.non_aggregative import DMx
|
||||||
|
from protocol import APP
|
||||||
|
from quapy.method.aggregative import CC, DMy, ACC
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
import numpy as np
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
qp.environ['SAMPLE_SIZE'] = 500
|
||||||
|
|
||||||
|
def cls():
|
||||||
|
return LogisticRegressionCV(n_jobs=-1,Cs=10)
|
||||||
|
|
||||||
|
def gen_methods():
|
||||||
|
yield CC(cls()), 'CC$_{10' + '\%}$'
|
||||||
|
yield ACC(cls()), 'ACC'
|
||||||
|
yield DMy(cls(), val_split=10, nbins=10, n_jobs=-1), 'HDy'
|
||||||
|
yield DMx(nbins=10, n_jobs=-1), 'HDx'
|
||||||
|
|
||||||
|
def gen_data():
|
||||||
|
|
||||||
|
train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
|
||||||
|
|
||||||
|
method_data = []
|
||||||
|
training_prevalence = 0.1
|
||||||
|
training_size = 5000
|
||||||
|
# since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained
|
||||||
|
train_sample = train.sampling(training_size, 1-training_prevalence, random_state=0)
|
||||||
|
|
||||||
|
for model, method_name in tqdm(gen_methods(), total=4):
|
||||||
|
with qp.util.temp_seed(1):
|
||||||
|
if method_name == 'HDx':
|
||||||
|
X, y = train_sample.Xy
|
||||||
|
svd = TruncatedSVD(n_components=5, random_state=0)
|
||||||
|
Xred = svd.fit_transform(X)
|
||||||
|
train_sample_dense = LabelledCollection(Xred, y)
|
||||||
|
|
||||||
|
X, y = test.Xy
|
||||||
|
test_dense = LabelledCollection(svd.transform(X), y)
|
||||||
|
|
||||||
|
model.fit(train_sample_dense)
|
||||||
|
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test_dense, repeats=100, random_state=0))
|
||||||
|
else:
|
||||||
|
model.fit(train_sample)
|
||||||
|
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
||||||
|
method_data.append((method_name, true_prev, estim_prev, train_sample.prevalence()))
|
||||||
|
|
||||||
|
return zip(*method_data)
|
||||||
|
|
||||||
|
|
||||||
|
method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
|
||||||
|
|
||||||
|
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots_cacm/bin_diag_4methods.pdf')
|
||||||
|
qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=10, savepath='./plots_cacm/err_drift_4methods.pdf', title='', show_density=False, show_std=True)
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
from protocol import APP
|
||||||
|
from quapy.method.aggregative import CC
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
import numpy as np
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
qp.environ['SAMPLE_SIZE'] = 500
|
||||||
|
|
||||||
|
def gen_data():
|
||||||
|
|
||||||
|
train, test = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=5).train_test
|
||||||
|
|
||||||
|
method_data = []
|
||||||
|
for training_prevalence in tqdm(np.linspace(0.1, 0.9, 9), total=9):
|
||||||
|
training_size = 5000
|
||||||
|
# since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained
|
||||||
|
train_sample = train.sampling(training_size, 1-training_prevalence)
|
||||||
|
|
||||||
|
# cls = GridSearchCV(LinearSVC(), param_grid={'C': np.logspace(-2,2,5), 'class_weight':[None, 'balanced']}, n_jobs=-1)
|
||||||
|
# cls = GridSearchCV(LogisticRegression(), param_grid={'C': np.logspace(-2, 2, 5), 'class_weight': [None, 'balanced']}, n_jobs=-1)
|
||||||
|
# cls.fit(*train_sample.Xy)
|
||||||
|
|
||||||
|
model = CC(LogisticRegressionCV(n_jobs=-1,Cs=10))
|
||||||
|
|
||||||
|
model.fit(train_sample)
|
||||||
|
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
||||||
|
method_name = 'CC$_{'+f'{int(100*training_prevalence)}' + '\%}$'
|
||||||
|
method_data.append((method_name, true_prev, estim_prev, train_sample.prevalence()))
|
||||||
|
|
||||||
|
return zip(*method_data)
|
||||||
|
|
||||||
|
|
||||||
|
method_names, true_prevs, estim_prevs, tr_prevs = gen_data()
|
||||||
|
|
||||||
|
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots_cacm/bin_diag_cc.pdf')
|
||||||
|
# qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=10, savepath='./plots_cacm/err_drift_cc.pdf', title='', show_density=False)
|
||||||
210
CHANGE_LOG.txt
|
|
@ -1,210 +0,0 @@
|
||||||
Change Log 0.2.0
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
- Base code Refactor:
|
|
||||||
- Removing coupling between LabelledCollection and quantification methods; the fit interface changes:
|
|
||||||
def fit(data:LabelledCollection): -> def fit(X, y):
|
|
||||||
- Adding function "predict" (function "quantify" is still present as an alias, for the nostalgic)
|
|
||||||
- Aggregative methods's behavior in terms of fit_classifier and how to treat the val_split is now
|
|
||||||
indicated exclusively at construction time, and it is no longer possible to indicate it at fit time.
|
|
||||||
This is because, in v<=0.1.9, one could create a method (e.g., ACC) and then indicate:
|
|
||||||
my_acc.fit(tr_data, fit_classifier=False, val_split=val_data)
|
|
||||||
in which case the first argument is unused, and this was ambiguous with
|
|
||||||
my_acc.fit(the_data, fit_classifier=False)
|
|
||||||
in which case the_data is to be used for validation purposes. However, the val_split could be set as a fraction
|
|
||||||
indicating only part of the_data must be used for validation, and the rest wasted... it was certainly confusing.
|
|
||||||
- This change imposes a versioning constrain with qunfold, which now must be >= 0.1.6
|
|
||||||
- EMQ has been modified, so that the representation function "classify" now only provides posterior
|
|
||||||
probabilities and, if required, these are recalibrated (e.g., by "bcts") during the aggregation function.
|
|
||||||
- A new parameter "on_calib_error" is passed to the constructor, which informs of the policy to follow
|
|
||||||
in case the abstention's calibration functions failed (which happens sometimes). Options include:
|
|
||||||
- 'raise': raises a RuntimeException (default)
|
|
||||||
- 'backup': reruns by silently avoiding calibration
|
|
||||||
- Parameter "recalib" has been renamed "calib"
|
|
||||||
- Added aggregative bootstrap for deriving confidence regions (confidence intervals, ellipses in the simplex, or
|
|
||||||
ellipses in the CLR space). This method is efficient as it leverages the two-phases of the aggregative quantifiers.
|
|
||||||
This method applies resampling only to the aggregation phase, thus avoiding to train many quantifiers, or
|
|
||||||
classify multiple times the instances of a sample. See:
|
|
||||||
- quapy/method/confidence.py (new)
|
|
||||||
- the new example no. 16.confidence_regions.py
|
|
||||||
- BayesianCC moved to confidence.py, where methods having to do with confidence intervals belong.
|
|
||||||
- Improved documentation of qp.plot module.
|
|
||||||
|
|
||||||
|
|
||||||
Change Log 0.1.9
|
|
||||||
----------------
|
|
||||||
|
|
||||||
- Added LeQua 2024 datasets and normalized match distance to qp.error
|
|
||||||
|
|
||||||
- Improved data loaders for UCI binary and UCI multiclass datasets (thanks to Lorenzo Volpi!); these datasets
|
|
||||||
can be loaded with standardised covariates (default)
|
|
||||||
|
|
||||||
- Added a default classifier for aggregative quantifiers, which now can be instantiated without specifying
|
|
||||||
the classifier. The default classifier can be accessed in qp.environ['DEFAULT_CLS'] and is assigned to
|
|
||||||
sklearn.linear_model.LogisticRegression(max_iter=3000). If the classifier is not specified, then a clone
|
|
||||||
of said classifier is returned. E.g.:
|
|
||||||
> pacc = PACC()
|
|
||||||
is equivalent to:
|
|
||||||
> pacc = PACC(classifier=LogisticRegression(max_iter=3000))
|
|
||||||
|
|
||||||
- Improved error loging in model selection. In v0.1.8 only Status.INVALID was reported; in v0.1.9 it is
|
|
||||||
now accompanied by a textual description of the error
|
|
||||||
|
|
||||||
- The number of parallel workers can now be set via an environment variable by running, e.g.:
|
|
||||||
> N_JOBS=10 python3 your_script.py
|
|
||||||
which has the same effect as writing the following code at the beginning of your_script.py:
|
|
||||||
> import quapy as qp
|
|
||||||
> qp.environ["N_JOBS"] = 10
|
|
||||||
|
|
||||||
- Some examples have been added to the ./examples/ dir, which now contains numbered examples from basics (0)
|
|
||||||
to advanced topics (higher numbers)
|
|
||||||
|
|
||||||
- Moved the wiki documents to the ./docs/ folder so that they become editable via PR for the community
|
|
||||||
|
|
||||||
- Added Composable methods from Mirko Bunse's qunfold library! (thanks to Mirko Bunse!)
|
|
||||||
|
|
||||||
- Added Continuous Integration with GitHub Actions (thanks to Mirko Bunse!)
|
|
||||||
|
|
||||||
- Added Bayesian CC method (thanks to Pawel Czyz!). The method is described in detail in the paper
|
|
||||||
Ziegler, Albert, and Paweł Czyż. "Bayesian Quantification with Black-Box Estimators."
|
|
||||||
arXiv preprint arXiv:2302.09159 (2023).
|
|
||||||
|
|
||||||
- Removed binary UCI datasets {acute.a, acute.b, balance.2} from the list qp.data.datasets.UCI_BINARY_DATASETS
|
|
||||||
(the datasets are still loadable from the fetch_UCIBinaryLabelledCollection and fetch_UCIBinaryDataset
|
|
||||||
functions, though). The reason is that these datasets tend to yield results (for all methods) that are
|
|
||||||
one or two orders of magnitude greater than for other datasets, and this has a disproportionate impact in
|
|
||||||
methods average (I suspect there is something wrong in those datasets).
|
|
||||||
|
|
||||||
|
|
||||||
Change Log 0.1.8
|
|
||||||
----------------
|
|
||||||
|
|
||||||
- Added Kernel Density Estimation methods (KDEyML, KDEyCS, KDEyHD) as proposed in the paper:
|
|
||||||
Moreo, A., González, P., & del Coz, J. J. Kernel Density Estimation for Multiclass Quantification.
|
|
||||||
arXiv preprint arXiv:2401.00490, 2024
|
|
||||||
|
|
||||||
- Substantial internal refactor: aggregative methods now inherit a pattern by which the fit method consists of:
|
|
||||||
a) fitting the classifier and returning the representations of the training instances (typically the posterior
|
|
||||||
probabilities, the label predictions, or the classifier scores, and typically obtained through kFCV).
|
|
||||||
b) fitting an aggregation function
|
|
||||||
The function implemented in step a) is inherited from the super class. Each new aggregative method now has to
|
|
||||||
implement only the "aggregative_fit" of step b).
|
|
||||||
This pattern was already implemented for the prediction (thus allowing evaluation functions to be performed
|
|
||||||
very quicky), and is now available also for training. The main benefit is that model selection now can nestle
|
|
||||||
the training of quantifiers in two levels: one for the classifier, and another for the aggregation function.
|
|
||||||
As a result, a method with a param grid of 10 combinations for the classifier and 10 combinations for the
|
|
||||||
quantifier, now implies 10 trainings of the classifier + 10*10 trainings of the aggregation function (this is
|
|
||||||
typically much faster than the classifier training), whereas in versions <0.1.8 this amounted to training
|
|
||||||
10*10 (classifiers+aggregations).
|
|
||||||
|
|
||||||
- Added different solvers for ACC and PACC quantifiers. In quapy < 0.1.8 these quantifiers try to solve the system
|
|
||||||
of equations Ax=B exactly (by means of np.linalg.solve). As noted by Mirko Bunse (thanks!), such an exact solution
|
|
||||||
does sometimes not exist. In cases like this, quapy < 0.1.8 resorted to CC for providing a plausible solution.
|
|
||||||
ACC and PACC now resorts to an approximated solution in such cases (minimizing the L2-norm of the difference
|
|
||||||
between Ax-B) as proposed by Mirko Bunse. A quick experiment reveals this heuristic greatly improves the results
|
|
||||||
of ACC and PACC in T2A@LeQua.
|
|
||||||
|
|
||||||
- Fixed ThresholdOptimization methods (X, T50, MAX, MS and MS2). Thanks to Tobias Schumacher and colleagues for pointing
|
|
||||||
this out in Appendix A of "Schumacher, T., Strohmaier, M., & Lemmerich, F. (2021). A comparative evaluation of
|
|
||||||
quantification methods. arXiv:2103.03223v3 [cs.LG]"
|
|
||||||
|
|
||||||
- Added HDx and DistributionMatchingX to non-aggregative quantifiers (see also the new example "comparing_HDy_HDx.py")
|
|
||||||
|
|
||||||
- New UCI multiclass datasets added (thanks to Pablo González). The 5 UCI multiclass datasets are those corresponding
|
|
||||||
to the following criteria:
|
|
||||||
- >1000 instances
|
|
||||||
- >2 classes
|
|
||||||
- classification datasets
|
|
||||||
- Python API available
|
|
||||||
|
|
||||||
- New IFCB (plankton) dataset added (thanks to Pablo González). See qp.datasets.fetch_IFCB.
|
|
||||||
|
|
||||||
- Added new evaluation measures NAE, NRAE (thanks to Andrea Esuli)
|
|
||||||
|
|
||||||
- Added new meta method "MedianEstimator"; an ensemble of binary base quantifiers that receives as input a dictionary
|
|
||||||
of hyperparameters that will explore exhaustively, fitting and generating predictions for each combination of
|
|
||||||
hyperparameters, and that returns, as the prevalence estimates, the median across all predictions.
|
|
||||||
|
|
||||||
- Added "custom_protocol.py" example.
|
|
||||||
|
|
||||||
- New API documentation template.
|
|
||||||
|
|
||||||
Change Log 0.1.7
|
|
||||||
----------------
|
|
||||||
|
|
||||||
- Protocols are now abstracted as instances of AbstractProtocol. There is a new class extending AbstractProtocol called
|
|
||||||
AbstractStochasticSeededProtocol, which implements a seeding policy to allow replicate the series of samplings.
|
|
||||||
There are some examples of protocols, APP, NPP, UPP, DomainMixer (experimental).
|
|
||||||
The idea is to start the sample generation by simply calling the __call__ method.
|
|
||||||
This change has a great impact in the framework, since many functions in qp.evaluation, qp.model_selection,
|
|
||||||
and sampling functions in LabelledCollection relied of the old functions. E.g., the functionality of
|
|
||||||
qp.evaluation.artificial_prevalence_report or qp.evaluation.natural_prevalence_report is now obtained by means of
|
|
||||||
qp.evaluation.report which takes a protocol as an argument. I have not maintained compatibility with the old
|
|
||||||
interfaces because I did not really like them. Check the wiki guide and the examples for more details.
|
|
||||||
|
|
||||||
- Exploration of hyperparameters in Model selection can now be run in parallel (there was a n_jobs argument in
|
|
||||||
QuaPy 0.1.6 but only the evaluation part for one specific hyperparameter was run in parallel).
|
|
||||||
|
|
||||||
- The prediction function has been refactored, so it applies the optimization for aggregative quantifiers (that
|
|
||||||
consists in pre-classifying all instances, and then only invoking aggregate on the samples) only in cases in
|
|
||||||
which the total number of classifications would be smaller than the number of classifications with the standard
|
|
||||||
procedure. The user can now specify "force", "auto", True of False, in order to actively decide for applying it
|
|
||||||
or not.
|
|
||||||
|
|
||||||
- examples directory created!
|
|
||||||
|
|
||||||
- DyS, Topsoe distance and binary search (thanks to Pablo González)
|
|
||||||
|
|
||||||
- Multi-thread reproducibility via seeding (thanks to Pablo González)
|
|
||||||
|
|
||||||
- n_jobs is now taken from the environment if set to None
|
|
||||||
|
|
||||||
- ACC, PACC, Forman's threshold variants have been parallelized.
|
|
||||||
|
|
||||||
- cross_val_predict (for quantification) added to model_selection: would be nice to allow the user specifies a
|
|
||||||
test protocol maybe, or None for bypassing it?
|
|
||||||
|
|
||||||
- Bugfix: adding two labelled collections (with +) now checks for consistency in the classes
|
|
||||||
|
|
||||||
- newer versions of numpy raise a warning when accessing types (e.g., np.float). I have replaced all such instances
|
|
||||||
with the plain python type (e.g., float).
|
|
||||||
|
|
||||||
- new dependency "abstention" (to add to the project requirements and setup). Calibration methods from
|
|
||||||
https://github.com/kundajelab/abstention added.
|
|
||||||
|
|
||||||
- the internal classifier of aggregative methods is now called "classifier" instead of "learner"
|
|
||||||
|
|
||||||
- when optimizing the hyperparameters of an aggregative quantifier, the classifier's specific hyperparameters
|
|
||||||
should be marked with a "classifier__" prefix (just like in scikit-learn with estimators), while the quantifier's
|
|
||||||
specific hyperparameters are named directly. For example, PCC(LogisticRegression()) quantifier has hyperparameters
|
|
||||||
"classifier__C", "classifier__class_weight", etc., instead of "C" and "class_weight" as in v0.1.6.
|
|
||||||
|
|
||||||
- hyperparameters yielding to inconsistent runs raise a ValueError exception, while hyperparameter combinations
|
|
||||||
yielding to internal errors of surrogate functions are reported and skipped, without stopping the grid search.
|
|
||||||
|
|
||||||
- DistributionMatching methods added. This is a general framework for distribution matching methods that catters for
|
|
||||||
multiclass quantification. That is to say, one could get a multiclass variant of the (originally binary) HDy
|
|
||||||
method aligned with the Firat's formulation.
|
|
||||||
|
|
||||||
- internal method properties "binary", "aggregative", and "probabilistic" have been removed; these conditions are
|
|
||||||
checked via isinstance
|
|
||||||
|
|
||||||
- quantifiers (i.e., classes that inherit from BaseQuantifier) are not forced to implement classes_ or n_classes;
|
|
||||||
these can be used anyway internally, but the framework will not suppose (nor impose) that a quantifier implements
|
|
||||||
them
|
|
||||||
|
|
||||||
- qp.evaluation.prediction has been optimized so that, if a quantifier is of type aggregative, and if the evaluation
|
|
||||||
protocol is of type OnLabelledCollection, then the computation is faster. In this specific case, the predictions
|
|
||||||
are issued only once and for all, and not for each sample. An exception to this (which is implement also), is
|
|
||||||
when the number of instances across all samples is anyway smaller than the number of instances in the original
|
|
||||||
labelled collection; in this case the heuristic is of no help, and is therefore not applied.
|
|
||||||
|
|
||||||
- the distinction between "classify" and "posterior_probabilities" has been removed in Aggregative quantifiers,
|
|
||||||
so that probabilistic classifiers return posterior probabilities, while non-probabilistic quantifiers
|
|
||||||
return crisp decisions.
|
|
||||||
|
|
||||||
- OneVsAll fixed. There are now two classes: a generic one OneVsAllGeneric that works with any quantifier (e.g.,
|
|
||||||
any instance of BaseQuantifier), and a subclass of it called OneVsAllAggregative which implements the
|
|
||||||
classify / aggregate interface. Both are instances of OneVsAll. There is a method getOneVsAll that returns the
|
|
||||||
best instance based on the type of quantifier.
|
|
||||||
|
|
||||||
53
README.md
|
|
@ -13,9 +13,9 @@ for facilitating the analysis and interpretation of the experimental results.
|
||||||
|
|
||||||
### Last updates:
|
### Last updates:
|
||||||
|
|
||||||
* Version 0.2.0 is released! major changes can be consulted [here](CHANGE_LOG.txt).
|
* Version 0.1.7 is released! major changes can be consulted [here](quapy/CHANGE_LOG.txt).
|
||||||
* The developer API documentation is available [here](https://hlt-isti.github.io/QuaPy/index.html)
|
* A detailed documentation is now available [here](https://hlt-isti.github.io/QuaPy/)
|
||||||
* Manuals are available [here](https://hlt-isti.github.io/QuaPy/manuals.html)
|
* The developer API documentation is available [here](https://hlt-isti.github.io/QuaPy/build/html/modules.html)
|
||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
|
|
||||||
|
|
@ -25,7 +25,7 @@ pip install quapy
|
||||||
|
|
||||||
### Cite QuaPy
|
### Cite QuaPy
|
||||||
|
|
||||||
If you find QuaPy useful (and we hope you will), please consider citing the original paper in your research:
|
If you find QuaPy useful (and we hope you will), plese consider citing the original paper in your research:
|
||||||
|
|
||||||
```
|
```
|
||||||
@inproceedings{moreo2021quapy,
|
@inproceedings{moreo2021quapy,
|
||||||
|
|
@ -46,18 +46,19 @@ of the test set.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
training, test = qp.datasets.fetch_UCIBinaryDataset("yeast").train_test
|
dataset = qp.datasets.fetch_twitter('semeval16')
|
||||||
|
|
||||||
# create an "Adjusted Classify & Count" quantifier
|
# create an "Adjusted Classify & Count" quantifier
|
||||||
model = qp.method.aggregative.ACC()
|
model = qp.method.aggregative.ACC(LogisticRegression())
|
||||||
Xtr, ytr = training.Xy
|
model.fit(dataset.training)
|
||||||
model.fit(Xtr, ytr)
|
|
||||||
|
|
||||||
estim_prevalence = model.predict(test.X)
|
estim_prevalence = model.quantify(dataset.test.instances)
|
||||||
true_prevalence = test.prevalence()
|
true_prevalence = dataset.test.prevalence()
|
||||||
|
|
||||||
error = qp.error.mae(true_prevalence, estim_prevalence)
|
error = qp.error.mae(true_prevalence, estim_prevalence)
|
||||||
|
|
||||||
print(f'Mean Absolute Error (MAE)={error:.3f}')
|
print(f'Mean Absolute Error (MAE)={error:.3f}')
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -68,7 +69,7 @@ class prevalence of the training set. For this reason, any quantification model
|
||||||
should be tested across many samples, even ones characterized by class prevalence
|
should be tested across many samples, even ones characterized by class prevalence
|
||||||
values different or very different from those found in the training set.
|
values different or very different from those found in the training set.
|
||||||
QuaPy implements sampling procedures and evaluation protocols that automate this workflow.
|
QuaPy implements sampling procedures and evaluation protocols that automate this workflow.
|
||||||
See the [documentation](https://hlt-isti.github.io/QuaPy/manuals.html) for detailed examples.
|
See the [Wiki](https://github.com/HLT-ISTI/QuaPy/wiki) for detailed examples.
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
|
|
@ -80,8 +81,7 @@ quantification methods based on structured output learning, HDy, QuaNet, quantif
|
||||||
* 32 UCI Machine Learning datasets.
|
* 32 UCI Machine Learning datasets.
|
||||||
* 11 Twitter quantification-by-sentiment datasets.
|
* 11 Twitter quantification-by-sentiment datasets.
|
||||||
* 3 product reviews quantification-by-sentiment datasets.
|
* 3 product reviews quantification-by-sentiment datasets.
|
||||||
* 4 tasks from LeQua 2022 competition and 4 tasks from LeQua 2024 competition
|
* 4 tasks from LeQua competition (_new in v0.1.7!_)
|
||||||
* IFCB for Plancton quantification
|
|
||||||
* Native support for binary and single-label multiclass quantification scenarios.
|
* Native support for binary and single-label multiclass quantification scenarios.
|
||||||
* Model selection functionality that minimizes quantification-oriented loss functions.
|
* Model selection functionality that minimizes quantification-oriented loss functions.
|
||||||
* Visualization tools for analysing the experimental results.
|
* Visualization tools for analysing the experimental results.
|
||||||
|
|
@ -96,29 +96,22 @@ quantification methods based on structured output learning, HDy, QuaNet, quantif
|
||||||
* pandas, xlrd
|
* pandas, xlrd
|
||||||
* matplotlib
|
* matplotlib
|
||||||
|
|
||||||
## Contributing
|
|
||||||
|
|
||||||
In case you want to contribute improvements to quapy, please generate pull request to the "devel" branch.
|
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
|
|
||||||
Check out the [developer API documentation here](https://hlt-isti.github.io/QuaPy/index.html).
|
The [developer API documentation](https://hlt-isti.github.io/QuaPy/build/html/modules.html) is available [here](https://hlt-isti.github.io/QuaPy/build/html/index.html).
|
||||||
|
|
||||||
Check out the [Manuals](https://hlt-isti.github.io/QuaPy/manuals.html), in which many code examples
|
Check out our [Wiki](https://github.com/HLT-ISTI/QuaPy/wiki), in which many examples
|
||||||
are provided:
|
are provided:
|
||||||
|
|
||||||
* [Datasets](https://hlt-isti.github.io/QuaPy/manuals/datasets.html)
|
* [Datasets](https://github.com/HLT-ISTI/QuaPy/wiki/Datasets)
|
||||||
* [Evaluation](https://hlt-isti.github.io/QuaPy/manuals/evaluation.html)
|
* [Evaluation](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation)
|
||||||
* [Protocols](https://hlt-isti.github.io/QuaPy/manuals/protocols.html)
|
* [Protocols](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols)
|
||||||
* [Methods](https://hlt-isti.github.io/QuaPy/manuals/methods.html)
|
* [Methods](https://github.com/HLT-ISTI/QuaPy/wiki/Methods)
|
||||||
* [SVMperf](https://hlt-isti.github.io/QuaPy/manuals/explicit-loss-minimization.html)
|
* [SVMperf](https://github.com/HLT-ISTI/QuaPy/wiki/ExplicitLossMinimization)
|
||||||
* [Model Selection](https://hlt-isti.github.io/QuaPy/manuals/model-selection.html)
|
* [Model Selection](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection)
|
||||||
* [Plotting](https://hlt-isti.github.io/QuaPy/manuals/plotting.html)
|
* [Plotting](https://github.com/HLT-ISTI/QuaPy/wiki/Plotting)
|
||||||
|
|
||||||
## Acknowledgments:
|
## Acknowledgments:
|
||||||
|
|
||||||
<img src="docs/source/SoBigData.png" alt="SoBigData++" width="250"/>
|
<img src="SoBigData.png" alt="SoBigData++" width="250"/>
|
||||||
|
|
||||||
This work has been supported by the QuaDaSh project
|
|
||||||
_"Finanziato dall’Unione europea---Next Generation EU,
|
|
||||||
Missione 4 Componente 2 CUP B53D23026250001"_.
|
|
||||||
|
|
|
||||||
|
Before Width: | Height: | Size: 128 KiB After Width: | Height: | Size: 128 KiB |
113
TODO.txt
|
|
@ -1,26 +1,95 @@
|
||||||
Solve the warnings issue; right now there is a warning ignore in method/__init__.py:
|
ensembles seem to be broken; they have an internal model selection which takes the parameters, but since quapy now
|
||||||
|
works with protocols it would need to know the validation set in order to pass something like
|
||||||
|
"protocol: APP(val, etc.)"
|
||||||
|
sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified
|
||||||
|
clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.)
|
||||||
|
make truly parallel the GridSearchQ
|
||||||
|
make more examples in the "examples" directory
|
||||||
|
merge with master, because I had to fix some problems with QuaNet due to an issue notified via GitHub!
|
||||||
|
added cross_val_predict in qp.model_selection (i.e., a cross_val_predict for quantification) --would be nice to have
|
||||||
|
it parallelized
|
||||||
|
|
||||||
Add 'platt' to calib options in EMQ?
|
check the OneVsAll module(s)
|
||||||
|
|
||||||
Allow n_prevpoints in APP to be specified by a user-defined grid?
|
check the set_params de neural.py, because the separation of estimator__<param> is not implemented; see also
|
||||||
|
__check_params_colision
|
||||||
|
|
||||||
|
HDy can be customized so that the number of bins is specified, instead of explored within the fit method
|
||||||
|
|
||||||
|
Packaging:
|
||||||
|
==========================================
|
||||||
|
Document methods with paper references
|
||||||
|
unit-tests
|
||||||
|
clean wiki_examples!
|
||||||
|
|
||||||
|
Refactor:
|
||||||
|
==========================================
|
||||||
|
Unify ThresholdOptimization methods, as an extension of PACC (and not ACC), the fit methods are almost identical and
|
||||||
|
use a prob classifier (take into account that PACC uses pcc internally, whereas the threshold methods use cc
|
||||||
|
instead). The fit method of ACC and PACC has a block for estimating the validation estimates that should be unified
|
||||||
|
as well...
|
||||||
|
Refactor protocols. APP and NPP related functionalities are duplicated in functional, LabelledCollection, and evaluation
|
||||||
|
|
||||||
|
|
||||||
|
New features:
|
||||||
|
==========================================
|
||||||
|
Add "measures for evaluating ordinal"?
|
||||||
|
Add datasets for topic.
|
||||||
|
Do we want to cover cross-lingual quantification natively in QuaPy, or does it make more sense as an application on top?
|
||||||
|
|
||||||
|
Current issues:
|
||||||
|
==========================================
|
||||||
|
Revise the class structure of quantification methods and the methods they inherit... There is some confusion regarding
|
||||||
|
methods isbinary, isprobabilistic, and the like. The attribute "learner_" in aggregative quantifiers is also
|
||||||
|
confusing, since there is a getter and a setter.
|
||||||
|
Remove the "deep" in get_params. There is no real compatibility with scikit-learn as for now.
|
||||||
|
SVMperf-based learners do not remove temp files in __del__?
|
||||||
|
In binary quantification (hp, kindle, imdb) we used F1 in the minority class (which in kindle and hp happens to be the
|
||||||
|
negative class). This is not covered in this new implementation, in which the binary case is not treated as such, but as
|
||||||
|
an instance of single-label with 2 labels. Check
|
||||||
|
Add automatic reindex of class labels in LabelledCollection (currently, class indexes should be ordered and with no gaps)
|
||||||
|
OVR I believe is currently tied to aggregative methods. We should provide a general interface also for general quantifiers
|
||||||
|
Currently, being "binary" only adds one checker; we should figure out how to impose the check to be automatically performed
|
||||||
|
Add random seed management to support replicability (see temp_seed in util.py).
|
||||||
|
GridSearchQ is not trully parallelized. It only parallelizes on the predictions.
|
||||||
|
In the context of a quantifier (e.g., QuaNet or CC), the parameters of the learner should be prefixed with "estimator__",
|
||||||
|
in QuaNet this is resolved with a __check_params_colision, but this should be improved. It might be cumbersome to
|
||||||
|
impose the "estimator__" prefix for, e.g., quantifiers like CC though... This should be changed everywhere...
|
||||||
|
QuaNet needs refactoring. The base quantifiers ACC and PACC receive val_data with instances already transformed. This
|
||||||
|
issue is due to a bad design.
|
||||||
|
|
||||||
|
Improvements:
|
||||||
|
==========================================
|
||||||
|
Explore the hyperparameter "number of bins" in HDy
|
||||||
|
Rename EMQ to SLD ?
|
||||||
|
Parallelize the kFCV in ACC and PACC?
|
||||||
|
Parallelize model selection trainings
|
||||||
|
We might want to think of (improving and) adding the class Tabular (it is defined and used on branch tweetsent). A more
|
||||||
|
recent version is in the project ql4facct. This class is meant to generate latex tables from results (highligting
|
||||||
|
best results, computing statistical tests, colouring cells, producing rankings, producing averages, etc.). Trying
|
||||||
|
to generate tables is typically a bad idea, but in this specific case we do have pretty good control of what an
|
||||||
|
experiment looks like. (Do we want to abstract experimental results? this could be useful not only for tables but
|
||||||
|
also for plots).
|
||||||
|
Add proper logging system. Currently we use print
|
||||||
|
It might be good to simplify the number of methods that have to be implemented for any new Quantifier. At the moment,
|
||||||
|
there are many functions like get_params, set_params, and, specially, @property classes_, which are cumbersome to
|
||||||
|
implement for quick experiments. A possible solution is to impose get_params and set_params only in cases in which
|
||||||
|
the model extends some "ModelSelectable" interface only. The classes_ should have a default implementation.
|
||||||
|
|
||||||
|
Checks:
|
||||||
|
==========================================
|
||||||
|
How many times is the system of equations for ACC and PACC not solved? How many times is it clipped? Do they sum up
|
||||||
|
to one always?
|
||||||
|
Re-check how hyperparameters from the quantifier and hyperparameters from the classifier (in aggregative quantifiers)
|
||||||
|
is handled. In scikit-learn the hyperparameters from a wrapper method are indicated directly whereas the hyperparams
|
||||||
|
from the internal learner are prefixed with "estimator__". In QuaPy, combinations having to do with the classifier
|
||||||
|
can be computed at the begining, and then in an internal loop the hyperparams of the quantifier can be explored,
|
||||||
|
passing fit_learner=False.
|
||||||
|
Re-check Ensembles. As for now, they are strongly tied to aggregative quantifiers.
|
||||||
|
Re-think the environment variables. Maybe add new ones (like, for example, parameters for the plots)
|
||||||
|
Do we want to wrap prevalences (currently simple np.ndarray) as a class? This might be convenient for some interfaces
|
||||||
|
(e.g., for specifying artificial prevalences in samplings, for printing them -- currently supported through
|
||||||
|
F.strprev(), etc.). This might however add some overload, and prevent/difficult post processing with numpy.
|
||||||
|
Would be nice to get a better integration with sklearn.
|
||||||
|
|
||||||
Add the fix suggested by Alexander?
|
|
||||||
"For a more general application, I would maybe first establish a per-class threshold value of plausible prevalence
|
|
||||||
based on the number of actual positives and the required sample size; e.g., for sample_size=100 and actual
|
|
||||||
positives [10, 100, 500] -> [0.1, 1.0, 1.0], meaning that class 0 can be sampled at most at 0.1 prevalence, while
|
|
||||||
the others can be sampled up to 1. prevalence. Then, when a prevalence value is requested, e.g., [0.33, 0.33, 0.33],
|
|
||||||
we may either clip each value and normalize (as you suggest for the extreme case, e.g., [0.1, 0.33, 0.33]/sum) or
|
|
||||||
scale each value by per-class thresholds, i.e., [0.33*0.1, 0.33*1, 0.33*1]/sum."
|
|
||||||
- This affects LabelledCollection
|
|
||||||
- This functionality should be accessible via sampling protocols and evaluation functions
|
|
||||||
|
|
||||||
- [TODO] document confidence in manuals
|
|
||||||
- [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example
|
|
||||||
- [TODO] Add EDy (an implementation is available at quantificationlib)
|
|
||||||
- [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
|
|
||||||
- [TODO] add HistNetQ
|
|
||||||
- [TODO] add CDE-iteration and Bayes-CDE methods
|
|
||||||
- [TODO] add Friedman's method and DeBias
|
|
||||||
- [TODO] check ignore warning stuff
|
|
||||||
check https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
|
|
||||||
- [TODO] nmd and md are not selectable from qp.evaluation.evaluate as a string
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
build/
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
# Minimal makefile for Sphinx documentation
|
|
||||||
#
|
|
||||||
|
|
||||||
# You can set these variables from the command line, and also
|
|
||||||
# from the environment for the first two.
|
|
||||||
SPHINXOPTS ?=
|
|
||||||
SPHINXBUILD ?= sphinx-build
|
|
||||||
SOURCEDIR = source
|
|
||||||
BUILDDIR = build
|
|
||||||
|
|
||||||
# Put it first so that "make" without argument is like "make help".
|
|
||||||
help:
|
|
||||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
||||||
|
|
||||||
.PHONY: help Makefile
|
|
||||||
|
|
||||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
|
||||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
|
||||||
%: Makefile
|
|
||||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
||||||
|
|
@ -0,0 +1,831 @@
|
||||||
|
|
||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||||
|
|
||||||
|
<title>Datasets — QuaPy 0.1.7 documentation</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/bizstyle.css" />
|
||||||
|
|
||||||
|
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
|
||||||
|
<script src="_static/jquery.js"></script>
|
||||||
|
<script src="_static/underscore.js"></script>
|
||||||
|
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||||
|
<script src="_static/doctools.js"></script>
|
||||||
|
<script src="_static/sphinx_highlight.js"></script>
|
||||||
|
<script src="_static/bizstyle.js"></script>
|
||||||
|
<link rel="index" title="Index" href="genindex.html" />
|
||||||
|
<link rel="search" title="Search" href="search.html" />
|
||||||
|
<link rel="next" title="Evaluation" href="Evaluation.html" />
|
||||||
|
<link rel="prev" title="Installation" href="Installation.html" />
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1.0" />
|
||||||
|
<!--[if lt IE 9]>
|
||||||
|
<script src="_static/css3-mediaqueries.js"></script>
|
||||||
|
<![endif]-->
|
||||||
|
</head><body>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
accesskey="I">index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Evaluation.html" title="Evaluation"
|
||||||
|
accesskey="N">next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Installation.html" title="Installation"
|
||||||
|
accesskey="P">previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Datasets</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="document">
|
||||||
|
<div class="documentwrapper">
|
||||||
|
<div class="bodywrapper">
|
||||||
|
<div class="body" role="main">
|
||||||
|
|
||||||
|
<section id="datasets">
|
||||||
|
<h1>Datasets<a class="headerlink" href="#datasets" title="Permalink to this heading">¶</a></h1>
|
||||||
|
<p>QuaPy makes available several datasets that have been used in
|
||||||
|
quantification literature, as well as an interface to allow
|
||||||
|
anyone import their custom datasets.</p>
|
||||||
|
<p>A <em>Dataset</em> object in QuaPy is roughly a pair of <em>LabelledCollection</em> objects,
|
||||||
|
one playing the role of the training set, another the test set.
|
||||||
|
<em>LabelledCollection</em> is a data class consisting of the (iterable)
|
||||||
|
instances and labels. This class handles most of the sampling functionality in QuaPy.
|
||||||
|
Take a look at the following code:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||||||
|
|
||||||
|
<span class="n">instances</span> <span class="o">=</span> <span class="p">[</span>
|
||||||
|
<span class="s1">'1st positive document'</span><span class="p">,</span> <span class="s1">'2nd positive document'</span><span class="p">,</span>
|
||||||
|
<span class="s1">'the only negative document'</span><span class="p">,</span>
|
||||||
|
<span class="s1">'1st neutral document'</span><span class="p">,</span> <span class="s1">'2nd neutral document'</span><span class="p">,</span> <span class="s1">'3rd neutral document'</span>
|
||||||
|
<span class="p">]</span>
|
||||||
|
<span class="n">labels</span> <span class="o">=</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
|
||||||
|
|
||||||
|
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span>
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">(),</span> <span class="n">prec</span><span class="o">=</span><span class="mi">2</span><span class="p">))</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Output the class prevalences (showing 2 digit precision):</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span><span class="mf">0.17</span><span class="p">,</span> <span class="mf">0.50</span><span class="p">,</span> <span class="mf">0.33</span><span class="p">]</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>One can easily produce new samples at desired class prevalence values:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">sample_size</span> <span class="o">=</span> <span class="mi">10</span>
|
||||||
|
<span class="n">prev</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.4</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">]</span>
|
||||||
|
<span class="n">sample</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prev</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="s1">'instances:'</span><span class="p">,</span> <span class="n">sample</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="s1">'labels:'</span><span class="p">,</span> <span class="n">sample</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span>
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="s1">'prevalence:'</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">sample</span><span class="o">.</span><span class="n">prevalence</span><span class="p">(),</span> <span class="n">prec</span><span class="o">=</span><span class="mi">2</span><span class="p">))</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Which outputs:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">instances</span><span class="p">:</span> <span class="p">[</span><span class="s1">'the only negative document'</span> <span class="s1">'2nd positive document'</span>
|
||||||
|
<span class="s1">'2nd positive document'</span> <span class="s1">'2nd neutral document'</span> <span class="s1">'1st positive document'</span>
|
||||||
|
<span class="s1">'the only negative document'</span> <span class="s1">'the only negative document'</span>
|
||||||
|
<span class="s1">'the only negative document'</span> <span class="s1">'2nd positive document'</span>
|
||||||
|
<span class="s1">'1st positive document'</span><span class="p">]</span>
|
||||||
|
<span class="n">labels</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span> <span class="mi">2</span> <span class="mi">2</span> <span class="mi">1</span> <span class="mi">2</span> <span class="mi">0</span> <span class="mi">0</span> <span class="mi">0</span> <span class="mi">2</span> <span class="mi">2</span><span class="p">]</span>
|
||||||
|
<span class="n">prevalence</span><span class="p">:</span> <span class="p">[</span><span class="mf">0.40</span><span class="p">,</span> <span class="mf">0.10</span><span class="p">,</span> <span class="mf">0.50</span><span class="p">]</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Samples can be made consistent across different runs (e.g., to test
|
||||||
|
different methods on the same exact samples) by sampling and retaining
|
||||||
|
the indexes, that can then be used to generate the sample:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">index</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prev</span><span class="p">)</span>
|
||||||
|
<span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">methods</span><span class="p">:</span>
|
||||||
|
<span class="n">sample</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
||||||
|
<span class="o">...</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>However, generating samples for evaluation purposes is tackled in QuaPy
|
||||||
|
by means of the evaluation protocols (see the dedicated entries in the Wiki
|
||||||
|
for <a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation">evaluation</a> and
|
||||||
|
<a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/wiki/Protocols">protocols</a>).</p>
|
||||||
|
<section id="reviews-datasets">
|
||||||
|
<h2>Reviews Datasets<a class="headerlink" href="#reviews-datasets" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>Three datasets of reviews about Kindle devices, Harry Potter’s series, and
|
||||||
|
the well-known IMDb movie reviews can be fetched using a unified interface.
|
||||||
|
For example:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'kindle'</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>These datasets have been used in:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Esuli</span><span class="p">,</span> <span class="n">A</span><span class="o">.</span><span class="p">,</span> <span class="n">Moreo</span><span class="p">,</span> <span class="n">A</span><span class="o">.</span><span class="p">,</span> <span class="o">&</span> <span class="n">Sebastiani</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span> <span class="p">(</span><span class="mi">2018</span><span class="p">,</span> <span class="n">October</span><span class="p">)</span><span class="o">.</span>
|
||||||
|
<span class="n">A</span> <span class="n">recurrent</span> <span class="n">neural</span> <span class="n">network</span> <span class="k">for</span> <span class="n">sentiment</span> <span class="n">quantification</span><span class="o">.</span>
|
||||||
|
<span class="n">In</span> <span class="n">Proceedings</span> <span class="n">of</span> <span class="n">the</span> <span class="mi">27</span><span class="n">th</span> <span class="n">ACM</span> <span class="n">International</span> <span class="n">Conference</span> <span class="n">on</span>
|
||||||
|
<span class="n">Information</span> <span class="ow">and</span> <span class="n">Knowledge</span> <span class="n">Management</span> <span class="p">(</span><span class="n">pp</span><span class="o">.</span> <span class="mi">1775</span><span class="o">-</span><span class="mi">1778</span><span class="p">)</span><span class="o">.</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The list of reviews ids is available in:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">REVIEWS_SENTIMENT_DATASETS</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Some statistics of the fhe available datasets are summarized below:</p>
|
||||||
|
<table class="docutils align-default">
|
||||||
|
<thead>
|
||||||
|
<tr class="row-odd"><th class="head"><p>Dataset</p></th>
|
||||||
|
<th class="head text-center"><p>classes</p></th>
|
||||||
|
<th class="head text-center"><p>train size</p></th>
|
||||||
|
<th class="head text-center"><p>test size</p></th>
|
||||||
|
<th class="head text-center"><p>train prev</p></th>
|
||||||
|
<th class="head text-center"><p>test prev</p></th>
|
||||||
|
<th class="head"><p>type</p></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr class="row-even"><td><p>hp</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>9533</p></td>
|
||||||
|
<td class="text-center"><p>18399</p></td>
|
||||||
|
<td class="text-center"><p>[0.018, 0.982]</p></td>
|
||||||
|
<td class="text-center"><p>[0.065, 0.935]</p></td>
|
||||||
|
<td><p>text</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>kindle</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>3821</p></td>
|
||||||
|
<td class="text-center"><p>21591</p></td>
|
||||||
|
<td class="text-center"><p>[0.081, 0.919]</p></td>
|
||||||
|
<td class="text-center"><p>[0.063, 0.937]</p></td>
|
||||||
|
<td><p>text</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>imdb</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>25000</p></td>
|
||||||
|
<td class="text-center"><p>25000</p></td>
|
||||||
|
<td class="text-center"><p>[0.500, 0.500]</p></td>
|
||||||
|
<td class="text-center"><p>[0.500, 0.500]</p></td>
|
||||||
|
<td><p>text</p></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</section>
|
||||||
|
<section id="twitter-sentiment-datasets">
|
||||||
|
<h2>Twitter Sentiment Datasets<a class="headerlink" href="#twitter-sentiment-datasets" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>11 Twitter datasets for sentiment analysis.
|
||||||
|
Text is not accessible, and the documents were made available
|
||||||
|
in tf-idf format. Each dataset presents two splits: a train/val
|
||||||
|
split for model selection purposes, and a train+val/test split
|
||||||
|
for model evaluation. The following code exemplifies how to load
|
||||||
|
a twitter dataset for model selection.</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_twitter</span><span class="p">(</span><span class="s1">'gasp'</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The datasets were used in:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Gao</span><span class="p">,</span> <span class="n">W</span><span class="o">.</span><span class="p">,</span> <span class="o">&</span> <span class="n">Sebastiani</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span> <span class="p">(</span><span class="mi">2015</span><span class="p">,</span> <span class="n">August</span><span class="p">)</span><span class="o">.</span>
|
||||||
|
<span class="n">Tweet</span> <span class="n">sentiment</span><span class="p">:</span> <span class="n">From</span> <span class="n">classification</span> <span class="n">to</span> <span class="n">quantification</span><span class="o">.</span>
|
||||||
|
<span class="n">In</span> <span class="mi">2015</span> <span class="n">IEEE</span><span class="o">/</span><span class="n">ACM</span> <span class="n">International</span> <span class="n">Conference</span> <span class="n">on</span> <span class="n">Advances</span> <span class="ow">in</span>
|
||||||
|
<span class="n">Social</span> <span class="n">Networks</span> <span class="n">Analysis</span> <span class="ow">and</span> <span class="n">Mining</span> <span class="p">(</span><span class="n">ASONAM</span><span class="p">)</span> <span class="p">(</span><span class="n">pp</span><span class="o">.</span> <span class="mi">97</span><span class="o">-</span><span class="mi">104</span><span class="p">)</span><span class="o">.</span> <span class="n">IEEE</span><span class="o">.</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Three of the datasets (semeval13, semeval14, and semeval15) share the
|
||||||
|
same training set (semeval), meaning that the training split one would get
|
||||||
|
when requesting any of them is the same. The dataset “semeval” can only
|
||||||
|
be requested with “for_model_selection=True”.
|
||||||
|
The lists of the Twitter dataset’s ids can be consulted in:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># a list of 11 dataset ids that can be used for model selection or model evaluation</span>
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span>
|
||||||
|
|
||||||
|
<span class="c1"># 9 dataset ids in which "semeval13", "semeval14", and "semeval15" are replaced with "semeval"</span>
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Some details can be found below:</p>
|
||||||
|
<table class="docutils align-default">
|
||||||
|
<thead>
|
||||||
|
<tr class="row-odd"><th class="head"><p>Dataset</p></th>
|
||||||
|
<th class="head text-center"><p>classes</p></th>
|
||||||
|
<th class="head text-center"><p>train size</p></th>
|
||||||
|
<th class="head text-center"><p>test size</p></th>
|
||||||
|
<th class="head text-center"><p>features</p></th>
|
||||||
|
<th class="head text-center"><p>train prev</p></th>
|
||||||
|
<th class="head text-center"><p>test prev</p></th>
|
||||||
|
<th class="head"><p>type</p></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr class="row-even"><td><p>gasp</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>8788</p></td>
|
||||||
|
<td class="text-center"><p>3765</p></td>
|
||||||
|
<td class="text-center"><p>694582</p></td>
|
||||||
|
<td class="text-center"><p>[0.421, 0.496, 0.082]</p></td>
|
||||||
|
<td class="text-center"><p>[0.407, 0.507, 0.086]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>hcr</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>1594</p></td>
|
||||||
|
<td class="text-center"><p>798</p></td>
|
||||||
|
<td class="text-center"><p>222046</p></td>
|
||||||
|
<td class="text-center"><p>[0.546, 0.211, 0.243]</p></td>
|
||||||
|
<td class="text-center"><p>[0.640, 0.167, 0.193]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>omd</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>1839</p></td>
|
||||||
|
<td class="text-center"><p>787</p></td>
|
||||||
|
<td class="text-center"><p>199151</p></td>
|
||||||
|
<td class="text-center"><p>[0.463, 0.271, 0.266]</p></td>
|
||||||
|
<td class="text-center"><p>[0.437, 0.283, 0.280]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>sanders</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>2155</p></td>
|
||||||
|
<td class="text-center"><p>923</p></td>
|
||||||
|
<td class="text-center"><p>229399</p></td>
|
||||||
|
<td class="text-center"><p>[0.161, 0.691, 0.148]</p></td>
|
||||||
|
<td class="text-center"><p>[0.164, 0.688, 0.148]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>semeval13</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>11338</p></td>
|
||||||
|
<td class="text-center"><p>3813</p></td>
|
||||||
|
<td class="text-center"><p>1215742</p></td>
|
||||||
|
<td class="text-center"><p>[0.159, 0.470, 0.372]</p></td>
|
||||||
|
<td class="text-center"><p>[0.158, 0.430, 0.412]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>semeval14</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>11338</p></td>
|
||||||
|
<td class="text-center"><p>1853</p></td>
|
||||||
|
<td class="text-center"><p>1215742</p></td>
|
||||||
|
<td class="text-center"><p>[0.159, 0.470, 0.372]</p></td>
|
||||||
|
<td class="text-center"><p>[0.109, 0.361, 0.530]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>semeval15</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>11338</p></td>
|
||||||
|
<td class="text-center"><p>2390</p></td>
|
||||||
|
<td class="text-center"><p>1215742</p></td>
|
||||||
|
<td class="text-center"><p>[0.159, 0.470, 0.372]</p></td>
|
||||||
|
<td class="text-center"><p>[0.153, 0.413, 0.434]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>semeval16</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>8000</p></td>
|
||||||
|
<td class="text-center"><p>2000</p></td>
|
||||||
|
<td class="text-center"><p>889504</p></td>
|
||||||
|
<td class="text-center"><p>[0.157, 0.351, 0.492]</p></td>
|
||||||
|
<td class="text-center"><p>[0.163, 0.341, 0.497]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>sst</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>2971</p></td>
|
||||||
|
<td class="text-center"><p>1271</p></td>
|
||||||
|
<td class="text-center"><p>376132</p></td>
|
||||||
|
<td class="text-center"><p>[0.261, 0.452, 0.288]</p></td>
|
||||||
|
<td class="text-center"><p>[0.207, 0.481, 0.312]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>wa</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>2184</p></td>
|
||||||
|
<td class="text-center"><p>936</p></td>
|
||||||
|
<td class="text-center"><p>248563</p></td>
|
||||||
|
<td class="text-center"><p>[0.305, 0.414, 0.281]</p></td>
|
||||||
|
<td class="text-center"><p>[0.282, 0.446, 0.272]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>wb</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>4259</p></td>
|
||||||
|
<td class="text-center"><p>1823</p></td>
|
||||||
|
<td class="text-center"><p>404333</p></td>
|
||||||
|
<td class="text-center"><p>[0.270, 0.392, 0.337]</p></td>
|
||||||
|
<td class="text-center"><p>[0.274, 0.392, 0.335]</p></td>
|
||||||
|
<td><p>sparse</p></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</section>
|
||||||
|
<section id="uci-machine-learning">
|
||||||
|
<h2>UCI Machine Learning<a class="headerlink" href="#uci-machine-learning" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>A set of 32 datasets from the <a class="reference external" href="https://archive.ics.uci.edu/ml/datasets.php">UCI Machine Learning repository</a>
|
||||||
|
used in:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Pérez</span><span class="o">-</span><span class="n">Gállego</span><span class="p">,</span> <span class="n">P</span><span class="o">.</span><span class="p">,</span> <span class="n">Quevedo</span><span class="p">,</span> <span class="n">J</span><span class="o">.</span> <span class="n">R</span><span class="o">.</span><span class="p">,</span> <span class="o">&</span> <span class="k">del</span> <span class="n">Coz</span><span class="p">,</span> <span class="n">J</span><span class="o">.</span> <span class="n">J</span><span class="o">.</span> <span class="p">(</span><span class="mi">2017</span><span class="p">)</span><span class="o">.</span>
|
||||||
|
<span class="n">Using</span> <span class="n">ensembles</span> <span class="k">for</span> <span class="n">problems</span> <span class="k">with</span> <span class="n">characterizable</span> <span class="n">changes</span>
|
||||||
|
<span class="ow">in</span> <span class="n">data</span> <span class="n">distribution</span><span class="p">:</span> <span class="n">A</span> <span class="n">case</span> <span class="n">study</span> <span class="n">on</span> <span class="n">quantification</span><span class="o">.</span>
|
||||||
|
<span class="n">Information</span> <span class="n">Fusion</span><span class="p">,</span> <span class="mi">34</span><span class="p">,</span> <span class="mi">87</span><span class="o">-</span><span class="mf">100.</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The list does not exactly coincide with that used in Pérez-Gállego et al. 2017
|
||||||
|
since we were unable to find the datasets with ids “diabetes” and “phoneme”.</p>
|
||||||
|
<p>These dataset can be loaded by calling, e.g.:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIDataset</span><span class="p">(</span><span class="s1">'yeast'</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>This call will return a <em>Dataset</em> object in which the training and
|
||||||
|
test splits are randomly drawn, in a stratified manner, from the whole
|
||||||
|
collection at 70% and 30%, respectively. The <em>verbose=True</em> option indicates
|
||||||
|
that the dataset description should be printed in standard output.
|
||||||
|
The original data is not split,
|
||||||
|
and some papers submit the entire collection to a kFCV validation.
|
||||||
|
In order to accommodate with these practices, one could first instantiate
|
||||||
|
the entire collection, and then creating a generator that will return one
|
||||||
|
training+test dataset at a time, following a kFCV protocol:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="n">collection</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCILabelledCollection</span><span class="p">(</span><span class="s2">"yeast"</span><span class="p">)</span>
|
||||||
|
<span class="k">for</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">Dataset</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">collection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">2</span><span class="p">):</span>
|
||||||
|
<span class="o">...</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Above code will allow to conduct a 2x5FCV evaluation on the “yeast” dataset.</p>
|
||||||
|
<p>All datasets come in numerical form (dense matrices); some statistics
|
||||||
|
are summarized below.</p>
|
||||||
|
<table class="docutils align-default">
|
||||||
|
<thead>
|
||||||
|
<tr class="row-odd"><th class="head"><p>Dataset</p></th>
|
||||||
|
<th class="head text-center"><p>classes</p></th>
|
||||||
|
<th class="head text-center"><p>instances</p></th>
|
||||||
|
<th class="head text-center"><p>features</p></th>
|
||||||
|
<th class="head text-center"><p>prev</p></th>
|
||||||
|
<th class="head"><p>type</p></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr class="row-even"><td><p>acute.a</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>120</p></td>
|
||||||
|
<td class="text-center"><p>6</p></td>
|
||||||
|
<td class="text-center"><p>[0.508, 0.492]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>acute.b</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>120</p></td>
|
||||||
|
<td class="text-center"><p>6</p></td>
|
||||||
|
<td class="text-center"><p>[0.583, 0.417]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>balance.1</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>625</p></td>
|
||||||
|
<td class="text-center"><p>4</p></td>
|
||||||
|
<td class="text-center"><p>[0.539, 0.461]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>balance.2</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>625</p></td>
|
||||||
|
<td class="text-center"><p>4</p></td>
|
||||||
|
<td class="text-center"><p>[0.922, 0.078]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>balance.3</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>625</p></td>
|
||||||
|
<td class="text-center"><p>4</p></td>
|
||||||
|
<td class="text-center"><p>[0.539, 0.461]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>breast-cancer</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>683</p></td>
|
||||||
|
<td class="text-center"><p>9</p></td>
|
||||||
|
<td class="text-center"><p>[0.350, 0.650]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>cmc.1</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>1473</p></td>
|
||||||
|
<td class="text-center"><p>9</p></td>
|
||||||
|
<td class="text-center"><p>[0.573, 0.427]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>cmc.2</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>1473</p></td>
|
||||||
|
<td class="text-center"><p>9</p></td>
|
||||||
|
<td class="text-center"><p>[0.774, 0.226]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>cmc.3</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>1473</p></td>
|
||||||
|
<td class="text-center"><p>9</p></td>
|
||||||
|
<td class="text-center"><p>[0.653, 0.347]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>ctg.1</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>2126</p></td>
|
||||||
|
<td class="text-center"><p>22</p></td>
|
||||||
|
<td class="text-center"><p>[0.222, 0.778]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>ctg.2</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>2126</p></td>
|
||||||
|
<td class="text-center"><p>22</p></td>
|
||||||
|
<td class="text-center"><p>[0.861, 0.139]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>ctg.3</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>2126</p></td>
|
||||||
|
<td class="text-center"><p>22</p></td>
|
||||||
|
<td class="text-center"><p>[0.917, 0.083]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>german</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>1000</p></td>
|
||||||
|
<td class="text-center"><p>24</p></td>
|
||||||
|
<td class="text-center"><p>[0.300, 0.700]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>haberman</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>306</p></td>
|
||||||
|
<td class="text-center"><p>3</p></td>
|
||||||
|
<td class="text-center"><p>[0.735, 0.265]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>ionosphere</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>351</p></td>
|
||||||
|
<td class="text-center"><p>34</p></td>
|
||||||
|
<td class="text-center"><p>[0.641, 0.359]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>iris.1</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>150</p></td>
|
||||||
|
<td class="text-center"><p>4</p></td>
|
||||||
|
<td class="text-center"><p>[0.667, 0.333]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>iris.2</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>150</p></td>
|
||||||
|
<td class="text-center"><p>4</p></td>
|
||||||
|
<td class="text-center"><p>[0.667, 0.333]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>iris.3</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>150</p></td>
|
||||||
|
<td class="text-center"><p>4</p></td>
|
||||||
|
<td class="text-center"><p>[0.667, 0.333]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>mammographic</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>830</p></td>
|
||||||
|
<td class="text-center"><p>5</p></td>
|
||||||
|
<td class="text-center"><p>[0.514, 0.486]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>pageblocks.5</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>5473</p></td>
|
||||||
|
<td class="text-center"><p>10</p></td>
|
||||||
|
<td class="text-center"><p>[0.979, 0.021]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>semeion</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>1593</p></td>
|
||||||
|
<td class="text-center"><p>256</p></td>
|
||||||
|
<td class="text-center"><p>[0.901, 0.099]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>sonar</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>208</p></td>
|
||||||
|
<td class="text-center"><p>60</p></td>
|
||||||
|
<td class="text-center"><p>[0.534, 0.466]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>spambase</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>4601</p></td>
|
||||||
|
<td class="text-center"><p>57</p></td>
|
||||||
|
<td class="text-center"><p>[0.606, 0.394]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>spectf</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>267</p></td>
|
||||||
|
<td class="text-center"><p>44</p></td>
|
||||||
|
<td class="text-center"><p>[0.794, 0.206]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>tictactoe</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>958</p></td>
|
||||||
|
<td class="text-center"><p>9</p></td>
|
||||||
|
<td class="text-center"><p>[0.653, 0.347]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>transfusion</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>748</p></td>
|
||||||
|
<td class="text-center"><p>4</p></td>
|
||||||
|
<td class="text-center"><p>[0.762, 0.238]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>wdbc</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>569</p></td>
|
||||||
|
<td class="text-center"><p>30</p></td>
|
||||||
|
<td class="text-center"><p>[0.627, 0.373]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>wine.1</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>178</p></td>
|
||||||
|
<td class="text-center"><p>13</p></td>
|
||||||
|
<td class="text-center"><p>[0.669, 0.331]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>wine.2</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>178</p></td>
|
||||||
|
<td class="text-center"><p>13</p></td>
|
||||||
|
<td class="text-center"><p>[0.601, 0.399]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>wine.3</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>178</p></td>
|
||||||
|
<td class="text-center"><p>13</p></td>
|
||||||
|
<td class="text-center"><p>[0.730, 0.270]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>wine-q-red</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>1599</p></td>
|
||||||
|
<td class="text-center"><p>11</p></td>
|
||||||
|
<td class="text-center"><p>[0.465, 0.535]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>wine-q-white</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>4898</p></td>
|
||||||
|
<td class="text-center"><p>11</p></td>
|
||||||
|
<td class="text-center"><p>[0.335, 0.665]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>yeast</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>1484</p></td>
|
||||||
|
<td class="text-center"><p>8</p></td>
|
||||||
|
<td class="text-center"><p>[0.711, 0.289]</p></td>
|
||||||
|
<td><p>dense</p></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<section id="issues">
|
||||||
|
<h3>Issues:<a class="headerlink" href="#issues" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p>All datasets will be downloaded automatically the first time they are requested, and
|
||||||
|
stored in the <em>quapy_data</em> folder for faster further reuse.
|
||||||
|
However, some datasets require special actions that at the moment are not fully
|
||||||
|
automated.</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p>Datasets with ids “ctg.1”, “ctg.2”, and “ctg.3” (<em>Cardiotocography Data Set</em>) load
|
||||||
|
an Excel file, which requires the user to install the <em>xlrd</em> Python module in order
|
||||||
|
to open it.</p></li>
|
||||||
|
<li><p>The dataset with id “pageblocks.5” (<em>Page Blocks Classification (5)</em>) needs to
|
||||||
|
open a “unix compressed file” (extension .Z), which is not directly doable with
|
||||||
|
standard Pythons packages like gzip or zip. This file would need to be uncompressed using
|
||||||
|
OS-dependent software manually. Information on how to do it will be printed the first
|
||||||
|
time the dataset is invoked.</p></li>
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
<section id="lequa-datasets">
|
||||||
|
<h2>LeQua Datasets<a class="headerlink" href="#lequa-datasets" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>QuaPy also provides the datasets used for the LeQua competition.
|
||||||
|
In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification
|
||||||
|
problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide
|
||||||
|
raw documents instead.
|
||||||
|
Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B
|
||||||
|
are multiclass quantification problems consisting of estimating the class prevalence
|
||||||
|
values of 28 different merchandise products.</p>
|
||||||
|
<p>Every task consists of a training set, a set of validation samples (for model selection)
|
||||||
|
and a set of test samples (for evaluation). QuaPy returns this data as a LabelledCollection
|
||||||
|
(training) and two generation protocols (for validation and test samples), as follows:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">training</span><span class="p">,</span> <span class="n">val_generator</span><span class="p">,</span> <span class="n">test_generator</span> <span class="o">=</span> <span class="n">fetch_lequa2022</span><span class="p">(</span><span class="n">task</span><span class="o">=</span><span class="n">task</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>See the <code class="docutils literal notranslate"><span class="pre">lequa2022_experiments.py</span></code> in the examples folder for further details on how to
|
||||||
|
carry out experiments using these datasets.</p>
|
||||||
|
<p>The datasets are downloaded only once, and stored for fast reuse.</p>
|
||||||
|
<p>Some statistics are summarized below:</p>
|
||||||
|
<table class="docutils align-default">
|
||||||
|
<thead>
|
||||||
|
<tr class="row-odd"><th class="head"><p>Dataset</p></th>
|
||||||
|
<th class="head text-center"><p>classes</p></th>
|
||||||
|
<th class="head text-center"><p>train size</p></th>
|
||||||
|
<th class="head text-center"><p>validation samples</p></th>
|
||||||
|
<th class="head text-center"><p>test samples</p></th>
|
||||||
|
<th class="head text-center"><p>docs by sample</p></th>
|
||||||
|
<th class="head text-center"><p>type</p></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr class="row-even"><td><p>T1A</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>5000</p></td>
|
||||||
|
<td class="text-center"><p>1000</p></td>
|
||||||
|
<td class="text-center"><p>5000</p></td>
|
||||||
|
<td class="text-center"><p>250</p></td>
|
||||||
|
<td class="text-center"><p>vector</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>T1B</p></td>
|
||||||
|
<td class="text-center"><p>28</p></td>
|
||||||
|
<td class="text-center"><p>20000</p></td>
|
||||||
|
<td class="text-center"><p>1000</p></td>
|
||||||
|
<td class="text-center"><p>5000</p></td>
|
||||||
|
<td class="text-center"><p>1000</p></td>
|
||||||
|
<td class="text-center"><p>vector</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-even"><td><p>T2A</p></td>
|
||||||
|
<td class="text-center"><p>2</p></td>
|
||||||
|
<td class="text-center"><p>5000</p></td>
|
||||||
|
<td class="text-center"><p>1000</p></td>
|
||||||
|
<td class="text-center"><p>5000</p></td>
|
||||||
|
<td class="text-center"><p>250</p></td>
|
||||||
|
<td class="text-center"><p>text</p></td>
|
||||||
|
</tr>
|
||||||
|
<tr class="row-odd"><td><p>T2B</p></td>
|
||||||
|
<td class="text-center"><p>28</p></td>
|
||||||
|
<td class="text-center"><p>20000</p></td>
|
||||||
|
<td class="text-center"><p>1000</p></td>
|
||||||
|
<td class="text-center"><p>5000</p></td>
|
||||||
|
<td class="text-center"><p>1000</p></td>
|
||||||
|
<td class="text-center"><p>text</p></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<p>For further details on the datasets, we refer to the original
|
||||||
|
<a class="reference external" href="https://ceur-ws.org/Vol-3180/paper-146.pdf">paper</a>:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Esuli</span><span class="p">,</span> <span class="n">A</span><span class="o">.</span><span class="p">,</span> <span class="n">Moreo</span><span class="p">,</span> <span class="n">A</span><span class="o">.</span><span class="p">,</span> <span class="n">Sebastiani</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="p">,</span> <span class="o">&</span> <span class="n">Sperduti</span><span class="p">,</span> <span class="n">G</span><span class="o">.</span> <span class="p">(</span><span class="mi">2022</span><span class="p">)</span><span class="o">.</span>
|
||||||
|
<span class="n">A</span> <span class="n">Detailed</span> <span class="n">Overview</span> <span class="n">of</span> <span class="n">LeQua</span><span class="o">@</span> <span class="n">CLEF</span> <span class="mi">2022</span><span class="p">:</span> <span class="n">Learning</span> <span class="n">to</span> <span class="n">Quantify</span><span class="o">.</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
<section id="adding-custom-datasets">
|
||||||
|
<h2>Adding Custom Datasets<a class="headerlink" href="#adding-custom-datasets" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>QuaPy provides data loaders for simple formats dealing with
|
||||||
|
text, following the format:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">class</span><span class="o">-</span><span class="nb">id</span> \<span class="n">t</span> <span class="n">first</span> <span class="n">document</span><span class="s1">'s pre-processed text </span><span class="se">\n</span>
|
||||||
|
<span class="n">class</span><span class="o">-</span><span class="nb">id</span> \<span class="n">t</span> <span class="n">second</span> <span class="n">document</span><span class="s1">'s pre-processed text </span><span class="se">\n</span>
|
||||||
|
<span class="o">...</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>and sparse representations of the form:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">{</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="ow">or</span> <span class="o">+</span><span class="mi">1</span><span class="p">}</span> <span class="n">col</span><span class="p">(</span><span class="nb">int</span><span class="p">):</span><span class="n">val</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span> <span class="n">col</span><span class="p">(</span><span class="nb">int</span><span class="p">):</span><span class="n">val</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span> <span class="o">...</span> \<span class="n">n</span>
|
||||||
|
<span class="o">...</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The code in charge in loading a LabelledCollection is:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="nd">@classmethod</span>
|
||||||
|
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">:</span><span class="n">callable</span><span class="p">):</span>
|
||||||
|
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="o">*</span><span class="n">loader_func</span><span class="p">(</span><span class="n">path</span><span class="p">))</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>indicating that any <em>loader_func</em> (e.g., a user-defined one) which
|
||||||
|
returns valid arguments for initializing a <em>LabelledCollection</em> object will allow
|
||||||
|
to load any collection. In particular, the <em>LabelledCollection</em> receives as
|
||||||
|
arguments the instances (as an iterable) and the labels (as an iterable) and,
|
||||||
|
additionally, the number of classes can be specified (it would otherwise be
|
||||||
|
inferred from the labels, but that requires at least one positive example for
|
||||||
|
all classes to be present in the collection).</p>
|
||||||
|
<p>The same <em>loader_func</em> can be passed to a Dataset, along with two
|
||||||
|
paths, in order to create a training and test pair of <em>LabelledCollection</em>,
|
||||||
|
e.g.:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
|
||||||
|
<span class="n">train_path</span> <span class="o">=</span> <span class="s1">'../my_data/train.dat'</span>
|
||||||
|
<span class="n">test_path</span> <span class="o">=</span> <span class="s1">'../my_data/test.dat'</span>
|
||||||
|
|
||||||
|
<span class="k">def</span> <span class="nf">my_custom_loader</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
||||||
|
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rb'</span><span class="p">)</span> <span class="k">as</span> <span class="n">fin</span><span class="p">:</span>
|
||||||
|
<span class="o">...</span>
|
||||||
|
<span class="k">return</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span>
|
||||||
|
|
||||||
|
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">Dataset</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">train_path</span><span class="p">,</span> <span class="n">test_path</span><span class="p">,</span> <span class="n">my_custom_loader</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<section id="data-processing">
|
||||||
|
<h3>Data Processing<a class="headerlink" href="#data-processing" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p>QuaPy implements a number of preprocessing functions in the package <em>qp.data.preprocessing</em>, including:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p><em>text2tfidf</em>: tfidf vectorization</p></li>
|
||||||
|
<li><p><em>reduce_columns</em>: reducing the number of columns based on term frequency</p></li>
|
||||||
|
<li><p><em>standardize</em>: transforms the column values into z-scores (i.e., subtract the mean and normalizes by the standard deviation, so
|
||||||
|
that the column values have zero mean and unit variance).</p></li>
|
||||||
|
<li><p><em>index</em>: transforms textual tokens into lists of numeric ids)</p></li>
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||||
|
<div class="sphinxsidebarwrapper">
|
||||||
|
<div>
|
||||||
|
<h3><a href="index.html">Table of Contents</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li><a class="reference internal" href="#">Datasets</a><ul>
|
||||||
|
<li><a class="reference internal" href="#reviews-datasets">Reviews Datasets</a></li>
|
||||||
|
<li><a class="reference internal" href="#twitter-sentiment-datasets">Twitter Sentiment Datasets</a></li>
|
||||||
|
<li><a class="reference internal" href="#uci-machine-learning">UCI Machine Learning</a><ul>
|
||||||
|
<li><a class="reference internal" href="#issues">Issues:</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li><a class="reference internal" href="#lequa-datasets">LeQua Datasets</a></li>
|
||||||
|
<li><a class="reference internal" href="#adding-custom-datasets">Adding Custom Datasets</a><ul>
|
||||||
|
<li><a class="reference internal" href="#data-processing">Data Processing</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Previous topic</h4>
|
||||||
|
<p class="topless"><a href="Installation.html"
|
||||||
|
title="previous chapter">Installation</a></p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Next topic</h4>
|
||||||
|
<p class="topless"><a href="Evaluation.html"
|
||||||
|
title="next chapter">Evaluation</a></p>
|
||||||
|
</div>
|
||||||
|
<div role="note" aria-label="source link">
|
||||||
|
<h3>This Page</h3>
|
||||||
|
<ul class="this-page-menu">
|
||||||
|
<li><a href="_sources/Datasets.md.txt"
|
||||||
|
rel="nofollow">Show Source</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div id="searchbox" style="display: none" role="search">
|
||||||
|
<h3 id="searchlabel">Quick search</h3>
|
||||||
|
<div class="searchformwrapper">
|
||||||
|
<form class="search" action="search.html" method="get">
|
||||||
|
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||||
|
<input type="submit" value="Go" />
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
>index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Evaluation.html" title="Evaluation"
|
||||||
|
>next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Installation.html" title="Installation"
|
||||||
|
>previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Datasets</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="footer" role="contentinfo">
|
||||||
|
© Copyright 2021, Alejandro Moreo.
|
||||||
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -0,0 +1,281 @@
|
||||||
|
|
||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||||
|
|
||||||
|
<title>Evaluation — QuaPy 0.1.7 documentation</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/bizstyle.css" />
|
||||||
|
|
||||||
|
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
|
||||||
|
<script src="_static/jquery.js"></script>
|
||||||
|
<script src="_static/underscore.js"></script>
|
||||||
|
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||||
|
<script src="_static/doctools.js"></script>
|
||||||
|
<script src="_static/sphinx_highlight.js"></script>
|
||||||
|
<script src="_static/bizstyle.js"></script>
|
||||||
|
<link rel="index" title="Index" href="genindex.html" />
|
||||||
|
<link rel="search" title="Search" href="search.html" />
|
||||||
|
<link rel="next" title="Protocols" href="Protocols.html" />
|
||||||
|
<link rel="prev" title="Datasets" href="Datasets.html" />
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1.0" />
|
||||||
|
<!--[if lt IE 9]>
|
||||||
|
<script src="_static/css3-mediaqueries.js"></script>
|
||||||
|
<![endif]-->
|
||||||
|
</head><body>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
accesskey="I">index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Protocols.html" title="Protocols"
|
||||||
|
accesskey="N">next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Datasets.html" title="Datasets"
|
||||||
|
accesskey="P">previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Evaluation</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="document">
|
||||||
|
<div class="documentwrapper">
|
||||||
|
<div class="bodywrapper">
|
||||||
|
<div class="body" role="main">
|
||||||
|
|
||||||
|
<section id="evaluation">
|
||||||
|
<h1>Evaluation<a class="headerlink" href="#evaluation" title="Permalink to this heading">¶</a></h1>
|
||||||
|
<p>Quantification is an appealing tool in scenarios of dataset shift,
|
||||||
|
and particularly in scenarios of prior-probability shift.
|
||||||
|
That is, the interest in estimating the class prevalences arises
|
||||||
|
under the belief that those class prevalences might have changed
|
||||||
|
with respect to the ones observed during training.
|
||||||
|
In other words, one could simply return the training prevalence
|
||||||
|
as a predictor of the test prevalence if this change is assumed
|
||||||
|
to be unlikely (as is the case in general scenarios of
|
||||||
|
machine learning governed by the iid assumption).
|
||||||
|
In brief, quantification requires dedicated evaluation protocols,
|
||||||
|
which are implemented in QuaPy and explained here.</p>
|
||||||
|
<section id="error-measures">
|
||||||
|
<h2>Error Measures<a class="headerlink" href="#error-measures" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>The module quapy.error implements the following error measures for quantification:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p><em>mae</em>: mean absolute error</p></li>
|
||||||
|
<li><p><em>mrae</em>: mean relative absolute error</p></li>
|
||||||
|
<li><p><em>mse</em>: mean squared error</p></li>
|
||||||
|
<li><p><em>mkld</em>: mean Kullback-Leibler Divergence</p></li>
|
||||||
|
<li><p><em>mnkld</em>: mean normalized Kullback-Leibler Divergence</p></li>
|
||||||
|
</ul>
|
||||||
|
<p>Functions <em>ae</em>, <em>rae</em>, <em>se</em>, <em>kld</em>, and <em>nkld</em> are also available,
|
||||||
|
which return the individual errors (i.e., without averaging the whole).</p>
|
||||||
|
<p>Some errors of classification are also available:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p><em>acce</em>: accuracy error (1-accuracy)</p></li>
|
||||||
|
<li><p><em>f1e</em>: F-1 score error (1-F1 score)</p></li>
|
||||||
|
</ul>
|
||||||
|
<p>The error functions implement the following interface, e.g.:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>in which the first argument is a ndarray containing the true
|
||||||
|
prevalences, and the second argument is another ndarray with
|
||||||
|
the estimations produced by some method.</p>
|
||||||
|
<p>Some error functions, e.g., <em>mrae</em>, <em>mkld</em>, and <em>mnkld</em>, are
|
||||||
|
smoothed for numerical stability. In those cases, there is a
|
||||||
|
third argument, e.g.:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">mrae</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span> <span class="o">...</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>indicating the value for the smoothing parameter epsilon.
|
||||||
|
Traditionally, this value is set to 1/(2T) in past literature,
|
||||||
|
with T the sampling size. One could either pass this value
|
||||||
|
to the function each time, or to set a QuaPy’s environment
|
||||||
|
variable <em>SAMPLE_SIZE</em> once, and omit this argument
|
||||||
|
thereafter (recommended);
|
||||||
|
e.g.:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span> <span class="c1"># once for all</span>
|
||||||
|
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">,</span> <span class="mf">0.2</span><span class="p">])</span> <span class="c1"># let's assume 3 classes</span>
|
||||||
|
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">,</span> <span class="mf">0.6</span><span class="p">])</span>
|
||||||
|
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mrae</span><span class="p">(</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">)</span>
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'mrae(</span><span class="si">{</span><span class="n">true_prev</span><span class="si">}</span><span class="s1">, </span><span class="si">{</span><span class="n">estim_prev</span><span class="si">}</span><span class="s1">) = </span><span class="si">{</span><span class="n">error</span><span class="si">:</span><span class="s1">.3f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>will print:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">mrae</span><span class="p">([</span><span class="mf">0.500</span><span class="p">,</span> <span class="mf">0.300</span><span class="p">,</span> <span class="mf">0.200</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.100</span><span class="p">,</span> <span class="mf">0.300</span><span class="p">,</span> <span class="mf">0.600</span><span class="p">])</span> <span class="o">=</span> <span class="mf">0.914</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Finally, it is possible to instantiate QuaPy’s quantification
|
||||||
|
error functions from strings using, e.g.:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">error_function</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="s1">'mse'</span><span class="p">)</span>
|
||||||
|
<span class="n">error</span> <span class="o">=</span> <span class="n">error_function</span><span class="p">(</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
<section id="evaluation-protocols">
|
||||||
|
<h2>Evaluation Protocols<a class="headerlink" href="#evaluation-protocols" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>An <em>evaluation protocol</em> is an evaluation procedure that uses
|
||||||
|
one specific <em>sample generation procotol</em> to genereate many
|
||||||
|
samples, typically characterized by widely varying amounts of
|
||||||
|
<em>shift</em> with respect to the original distribution, that are then
|
||||||
|
used to evaluate the performance of a (trained) quantifier.
|
||||||
|
These protocols are explained in more detail in a dedicated <a class="reference internal" href="Protocols.html"><span class="doc std std-doc">entry
|
||||||
|
in the wiki</span></a>. For the moment being, let us assume we already have
|
||||||
|
chosen and instantiated one specific such protocol, that we here
|
||||||
|
simply call <em>prot</em>. Let also assume our model is called
|
||||||
|
<em>quantifier</em> and that our evaluatio measure of choice is
|
||||||
|
<em>mae</em>. The evaluation comes down to:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">mae</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">prot</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">)</span>
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'MAE = </span><span class="si">{</span><span class="n">mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>It is often desirable to evaluate our system using more than one
|
||||||
|
single evaluatio measure. In this case, it is convenient to generate
|
||||||
|
a <em>report</em>. A report in QuaPy is a dataframe accounting for all the
|
||||||
|
true prevalence values with their corresponding prevalence values
|
||||||
|
as estimated by the quantifier, along with the error each has given
|
||||||
|
rise.</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">report</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluation_report</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">prot</span><span class="p">,</span> <span class="n">error_metrics</span><span class="o">=</span><span class="p">[</span><span class="s1">'mae'</span><span class="p">,</span> <span class="s1">'mrae'</span><span class="p">,</span> <span class="s1">'mkld'</span><span class="p">])</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>From a pandas’ dataframe, it is straightforward to visualize all the results,
|
||||||
|
and compute the averaged values, e.g.:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">pd</span><span class="o">.</span><span class="n">set_option</span><span class="p">(</span><span class="s1">'display.expand_frame_repr'</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
|
||||||
|
<span class="n">report</span><span class="p">[</span><span class="s1">'estim-prev'</span><span class="p">]</span> <span class="o">=</span> <span class="n">report</span><span class="p">[</span><span class="s1">'estim-prev'</span><span class="p">]</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">)</span>
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="n">report</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="s1">'Averaged values:'</span><span class="p">)</span>
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="n">report</span><span class="o">.</span><span class="n">mean</span><span class="p">())</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>This will produce an output like:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span> <span class="n">true</span><span class="o">-</span><span class="n">prev</span> <span class="n">estim</span><span class="o">-</span><span class="n">prev</span> <span class="n">mae</span> <span class="n">mrae</span> <span class="n">mkld</span>
|
||||||
|
<span class="mi">0</span> <span class="p">[</span><span class="mf">0.308</span><span class="p">,</span> <span class="mf">0.692</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.314</span><span class="p">,</span> <span class="mf">0.686</span><span class="p">]</span> <span class="mf">0.005649</span> <span class="mf">0.013182</span> <span class="mf">0.000074</span>
|
||||||
|
<span class="mi">1</span> <span class="p">[</span><span class="mf">0.896</span><span class="p">,</span> <span class="mf">0.104</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.909</span><span class="p">,</span> <span class="mf">0.091</span><span class="p">]</span> <span class="mf">0.013145</span> <span class="mf">0.069323</span> <span class="mf">0.000985</span>
|
||||||
|
<span class="mi">2</span> <span class="p">[</span><span class="mf">0.848</span><span class="p">,</span> <span class="mf">0.152</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.809</span><span class="p">,</span> <span class="mf">0.191</span><span class="p">]</span> <span class="mf">0.039063</span> <span class="mf">0.149806</span> <span class="mf">0.005175</span>
|
||||||
|
<span class="mi">3</span> <span class="p">[</span><span class="mf">0.016</span><span class="p">,</span> <span class="mf">0.984</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.033</span><span class="p">,</span> <span class="mf">0.967</span><span class="p">]</span> <span class="mf">0.017236</span> <span class="mf">0.487529</span> <span class="mf">0.005298</span>
|
||||||
|
<span class="mi">4</span> <span class="p">[</span><span class="mf">0.728</span><span class="p">,</span> <span class="mf">0.272</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.751</span><span class="p">,</span> <span class="mf">0.249</span><span class="p">]</span> <span class="mf">0.022769</span> <span class="mf">0.057146</span> <span class="mf">0.001350</span>
|
||||||
|
<span class="o">...</span> <span class="o">...</span> <span class="o">...</span> <span class="o">...</span> <span class="o">...</span> <span class="o">...</span>
|
||||||
|
<span class="mi">4995</span> <span class="p">[</span><span class="mf">0.72</span><span class="p">,</span> <span class="mf">0.28</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.698</span><span class="p">,</span> <span class="mf">0.302</span><span class="p">]</span> <span class="mf">0.021752</span> <span class="mf">0.053631</span> <span class="mf">0.001133</span>
|
||||||
|
<span class="mi">4996</span> <span class="p">[</span><span class="mf">0.868</span><span class="p">,</span> <span class="mf">0.132</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.888</span><span class="p">,</span> <span class="mf">0.112</span><span class="p">]</span> <span class="mf">0.020490</span> <span class="mf">0.088230</span> <span class="mf">0.001985</span>
|
||||||
|
<span class="mi">4997</span> <span class="p">[</span><span class="mf">0.292</span><span class="p">,</span> <span class="mf">0.708</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.298</span><span class="p">,</span> <span class="mf">0.702</span><span class="p">]</span> <span class="mf">0.006149</span> <span class="mf">0.014788</span> <span class="mf">0.000090</span>
|
||||||
|
<span class="mi">4998</span> <span class="p">[</span><span class="mf">0.24</span><span class="p">,</span> <span class="mf">0.76</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.220</span><span class="p">,</span> <span class="mf">0.780</span><span class="p">]</span> <span class="mf">0.019950</span> <span class="mf">0.054309</span> <span class="mf">0.001127</span>
|
||||||
|
<span class="mi">4999</span> <span class="p">[</span><span class="mf">0.948</span><span class="p">,</span> <span class="mf">0.052</span><span class="p">]</span> <span class="p">[</span><span class="mf">0.965</span><span class="p">,</span> <span class="mf">0.035</span><span class="p">]</span> <span class="mf">0.016941</span> <span class="mf">0.165776</span> <span class="mf">0.003538</span>
|
||||||
|
|
||||||
|
<span class="p">[</span><span class="mi">5000</span> <span class="n">rows</span> <span class="n">x</span> <span class="mi">5</span> <span class="n">columns</span><span class="p">]</span>
|
||||||
|
<span class="n">Averaged</span> <span class="n">values</span><span class="p">:</span>
|
||||||
|
<span class="n">mae</span> <span class="mf">0.023588</span>
|
||||||
|
<span class="n">mrae</span> <span class="mf">0.108779</span>
|
||||||
|
<span class="n">mkld</span> <span class="mf">0.003631</span>
|
||||||
|
<span class="n">dtype</span><span class="p">:</span> <span class="n">float64</span>
|
||||||
|
|
||||||
|
<span class="n">Process</span> <span class="n">finished</span> <span class="k">with</span> <span class="n">exit</span> <span class="n">code</span> <span class="mi">0</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Alternatively, we can simply generate all the predictions by:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">prediction</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">prot</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>All the evaluation functions implement specific optimizations for speeding-up
|
||||||
|
the evaluation of aggregative quantifiers (i.e., of instances of <em>AggregativeQuantifier</em>).
|
||||||
|
The optimization comes down to generating classification predictions (either crisp or soft)
|
||||||
|
only once for the entire test set, and then applying the sampling procedure to the
|
||||||
|
predictions, instead of generating samples of instances and then computing the
|
||||||
|
classification predictions every time. This is only possible when the protocol
|
||||||
|
is an instance of <em>OnLabelledCollectionProtocol</em>. The optimization is only
|
||||||
|
carried out when the number of classification predictions thus generated would be
|
||||||
|
smaller than the number of predictions required for the entire protocol; e.g.,
|
||||||
|
if the original dataset contains 1M instances, but the protocol is such that it would
|
||||||
|
at most generate 20 samples of 100 instances, then it would be preferable to postpone the
|
||||||
|
classification for each sample. This behaviour is indicated by setting
|
||||||
|
<em>aggr_speedup=”auto”</em>. Conversely, when indicating <em>aggr_speedup=”force”</em> QuaPy will
|
||||||
|
precompute all the predictions irrespectively of the number of instances and number of samples.
|
||||||
|
Finally, this can be deactivated by setting <em>aggr_speedup=False</em>. Note that this optimization
|
||||||
|
is not only applied for the final evaluation, but also for the internal evaluations carried
|
||||||
|
out during <em>model selection</em>. Since these are typically many, the heuristic can help reduce the
|
||||||
|
execution time a lot.</p>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||||
|
<div class="sphinxsidebarwrapper">
|
||||||
|
<div>
|
||||||
|
<h3><a href="index.html">Table of Contents</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li><a class="reference internal" href="#">Evaluation</a><ul>
|
||||||
|
<li><a class="reference internal" href="#error-measures">Error Measures</a></li>
|
||||||
|
<li><a class="reference internal" href="#evaluation-protocols">Evaluation Protocols</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Previous topic</h4>
|
||||||
|
<p class="topless"><a href="Datasets.html"
|
||||||
|
title="previous chapter">Datasets</a></p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Next topic</h4>
|
||||||
|
<p class="topless"><a href="Protocols.html"
|
||||||
|
title="next chapter">Protocols</a></p>
|
||||||
|
</div>
|
||||||
|
<div role="note" aria-label="source link">
|
||||||
|
<h3>This Page</h3>
|
||||||
|
<ul class="this-page-menu">
|
||||||
|
<li><a href="_sources/Evaluation.md.txt"
|
||||||
|
rel="nofollow">Show Source</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div id="searchbox" style="display: none" role="search">
|
||||||
|
<h3 id="searchlabel">Quick search</h3>
|
||||||
|
<div class="searchformwrapper">
|
||||||
|
<form class="search" action="search.html" method="get">
|
||||||
|
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||||
|
<input type="submit" value="Go" />
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
>index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Protocols.html" title="Protocols"
|
||||||
|
>next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Datasets.html" title="Datasets"
|
||||||
|
>previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Evaluation</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="footer" role="contentinfo">
|
||||||
|
© Copyright 2021, Alejandro Moreo.
|
||||||
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -0,0 +1,178 @@
|
||||||
|
|
||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||||
|
|
||||||
|
<title>Installation — QuaPy 0.1.7 documentation</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/bizstyle.css" />
|
||||||
|
|
||||||
|
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
|
||||||
|
<script src="_static/jquery.js"></script>
|
||||||
|
<script src="_static/underscore.js"></script>
|
||||||
|
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||||
|
<script src="_static/doctools.js"></script>
|
||||||
|
<script src="_static/sphinx_highlight.js"></script>
|
||||||
|
<script src="_static/bizstyle.js"></script>
|
||||||
|
<link rel="index" title="Index" href="genindex.html" />
|
||||||
|
<link rel="search" title="Search" href="search.html" />
|
||||||
|
<link rel="next" title="Datasets" href="Datasets.html" />
|
||||||
|
<link rel="prev" title="Welcome to QuaPy’s documentation!" href="index.html" />
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1.0" />
|
||||||
|
<!--[if lt IE 9]>
|
||||||
|
<script src="_static/css3-mediaqueries.js"></script>
|
||||||
|
<![endif]-->
|
||||||
|
</head><body>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
accesskey="I">index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Datasets.html" title="Datasets"
|
||||||
|
accesskey="N">next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="index.html" title="Welcome to QuaPy’s documentation!"
|
||||||
|
accesskey="P">previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Installation</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="document">
|
||||||
|
<div class="documentwrapper">
|
||||||
|
<div class="bodywrapper">
|
||||||
|
<div class="body" role="main">
|
||||||
|
|
||||||
|
<section id="installation">
|
||||||
|
<h1>Installation<a class="headerlink" href="#installation" title="Permalink to this heading">¶</a></h1>
|
||||||
|
<p>QuaPy can be easily installed via <cite>pip</cite></p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pip</span> <span class="n">install</span> <span class="n">quapy</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>See <a class="reference external" href="https://pypi.org/project/QuaPy/">pip page</a> for older versions.</p>
|
||||||
|
<section id="requirements">
|
||||||
|
<h2>Requirements<a class="headerlink" href="#requirements" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p>scikit-learn, numpy, scipy</p></li>
|
||||||
|
<li><p>pytorch (for QuaNet)</p></li>
|
||||||
|
<li><p>svmperf patched for quantification (see below)</p></li>
|
||||||
|
<li><p>joblib</p></li>
|
||||||
|
<li><p>tqdm</p></li>
|
||||||
|
<li><p>pandas, xlrd</p></li>
|
||||||
|
<li><p>matplotlib</p></li>
|
||||||
|
</ul>
|
||||||
|
</section>
|
||||||
|
<section id="svm-perf-with-quantification-oriented-losses">
|
||||||
|
<h2>SVM-perf with quantification-oriented losses<a class="headerlink" href="#svm-perf-with-quantification-oriented-losses" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>In order to run experiments involving SVM(Q), SVM(KLD), SVM(NKLD),
|
||||||
|
SVM(AE), or SVM(RAE), you have to first download the
|
||||||
|
<a class="reference external" href="http://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html">svmperf</a>
|
||||||
|
package, apply the patch
|
||||||
|
<a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/blob/master/svm-perf-quantification-ext.patch">svm-perf-quantification-ext.patch</a>,
|
||||||
|
and compile the sources.
|
||||||
|
The script
|
||||||
|
<a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh">prepare_svmperf.sh</a>,
|
||||||
|
does all the job. Simply run:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="n">prepare_svmperf</span><span class="o">.</span><span class="n">sh</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The resulting directory <cite>./svm_perf_quantification</cite> contains the
|
||||||
|
patched version of <cite>svmperf</cite> with quantification-oriented losses.</p>
|
||||||
|
<p>The
|
||||||
|
<a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/blob/master/svm-perf-quantification-ext.patch">svm-perf-quantification-ext.patch</a>
|
||||||
|
is an extension of the patch made available by
|
||||||
|
<a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0">Esuli et al. 2015</a>
|
||||||
|
that allows SVMperf to optimize for
|
||||||
|
the <cite>Q</cite> measure as proposed by
|
||||||
|
<a class="reference external" href="https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X">Barranquero et al. 2015</a>
|
||||||
|
and for the <cite>KLD</cite> and <cite>NKLD</cite> as proposed by
|
||||||
|
<a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0">Esuli et al. 2015</a>
|
||||||
|
for quantification.
|
||||||
|
This patch extends the former by also allowing SVMperf to optimize for
|
||||||
|
<cite>AE</cite> and <cite>RAE</cite>.</p>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||||
|
<div class="sphinxsidebarwrapper">
|
||||||
|
<div>
|
||||||
|
<h3><a href="index.html">Table of Contents</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li><a class="reference internal" href="#">Installation</a><ul>
|
||||||
|
<li><a class="reference internal" href="#requirements">Requirements</a></li>
|
||||||
|
<li><a class="reference internal" href="#svm-perf-with-quantification-oriented-losses">SVM-perf with quantification-oriented losses</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Previous topic</h4>
|
||||||
|
<p class="topless"><a href="index.html"
|
||||||
|
title="previous chapter">Welcome to QuaPy’s documentation!</a></p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Next topic</h4>
|
||||||
|
<p class="topless"><a href="Datasets.html"
|
||||||
|
title="next chapter">Datasets</a></p>
|
||||||
|
</div>
|
||||||
|
<div role="note" aria-label="source link">
|
||||||
|
<h3>This Page</h3>
|
||||||
|
<ul class="this-page-menu">
|
||||||
|
<li><a href="_sources/Installation.rst.txt"
|
||||||
|
rel="nofollow">Show Source</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div id="searchbox" style="display: none" role="search">
|
||||||
|
<h3 id="searchlabel">Quick search</h3>
|
||||||
|
<div class="searchformwrapper">
|
||||||
|
<form class="search" action="search.html" method="get">
|
||||||
|
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||||
|
<input type="submit" value="Go" />
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
>index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Datasets.html" title="Datasets"
|
||||||
|
>next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="index.html" title="Welcome to QuaPy’s documentation!"
|
||||||
|
>previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Installation</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="footer" role="contentinfo">
|
||||||
|
© Copyright 2021, Alejandro Moreo.
|
||||||
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -0,0 +1,539 @@
|
||||||
|
|
||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||||
|
|
||||||
|
<title>Quantification Methods — QuaPy 0.1.7 documentation</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/bizstyle.css" />
|
||||||
|
|
||||||
|
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
|
||||||
|
<script src="_static/jquery.js"></script>
|
||||||
|
<script src="_static/underscore.js"></script>
|
||||||
|
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||||
|
<script src="_static/doctools.js"></script>
|
||||||
|
<script src="_static/sphinx_highlight.js"></script>
|
||||||
|
<script src="_static/bizstyle.js"></script>
|
||||||
|
<link rel="index" title="Index" href="genindex.html" />
|
||||||
|
<link rel="search" title="Search" href="search.html" />
|
||||||
|
<link rel="next" title="Model Selection" href="Model-Selection.html" />
|
||||||
|
<link rel="prev" title="Protocols" href="Protocols.html" />
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1.0" />
|
||||||
|
<!--[if lt IE 9]>
|
||||||
|
<script src="_static/css3-mediaqueries.js"></script>
|
||||||
|
<![endif]-->
|
||||||
|
</head><body>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
accesskey="I">index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Model-Selection.html" title="Model Selection"
|
||||||
|
accesskey="N">next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Protocols.html" title="Protocols"
|
||||||
|
accesskey="P">previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Quantification Methods</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="document">
|
||||||
|
<div class="documentwrapper">
|
||||||
|
<div class="bodywrapper">
|
||||||
|
<div class="body" role="main">
|
||||||
|
|
||||||
|
<section id="quantification-methods">
|
||||||
|
<h1>Quantification Methods<a class="headerlink" href="#quantification-methods" title="Permalink to this heading">¶</a></h1>
|
||||||
|
<p>Quantification methods can be categorized as belonging to
|
||||||
|
<em>aggregative</em> and <em>non-aggregative</em> groups.
|
||||||
|
Most methods included in QuaPy at the moment are of type <em>aggregative</em>
|
||||||
|
(though we plan to add many more methods in the near future), i.e.,
|
||||||
|
are methods characterized by the fact that
|
||||||
|
quantification is performed as an aggregation function of the individual
|
||||||
|
products of classification.</p>
|
||||||
|
<p>Any quantifier in QuaPy shoud extend the class <em>BaseQuantifier</em>,
|
||||||
|
and implement some abstract methods:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span> <span class="nd">@abstractmethod</span>
|
||||||
|
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span> <span class="o">...</span>
|
||||||
|
|
||||||
|
<span class="nd">@abstractmethod</span>
|
||||||
|
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span> <span class="o">...</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The meaning of those functions should be familiar to those
|
||||||
|
used to work with scikit-learn since the class structure of QuaPy
|
||||||
|
is directly inspired by scikit-learn’s <em>Estimators</em>. Functions
|
||||||
|
<em>fit</em> and <em>quantify</em> are used to train the model and to provide
|
||||||
|
class estimations (the reason why
|
||||||
|
scikit-learn’ structure has not been adopted <em>as is</em> in QuaPy responds to
|
||||||
|
the fact that scikit-learn’s <em>predict</em> function is expected to return
|
||||||
|
one output for each input element –e.g., a predicted label for each
|
||||||
|
instance in a sample– while in quantification the output for a sample
|
||||||
|
is one single array of class prevalences).
|
||||||
|
Quantifiers also extend from scikit-learn’s <code class="docutils literal notranslate"><span class="pre">BaseEstimator</span></code>, in order
|
||||||
|
to simplify the use of <em>set_params</em> and <em>get_params</em> used in
|
||||||
|
<a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection">model selector</a>.</p>
|
||||||
|
<section id="aggregative-methods">
|
||||||
|
<h2>Aggregative Methods<a class="headerlink" href="#aggregative-methods" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>All quantification methods are implemented as part of the
|
||||||
|
<em>qp.method</em> package. In particular, <em>aggregative</em> methods are defined in
|
||||||
|
<em>qp.method.aggregative</em>, and extend <em>AggregativeQuantifier(BaseQuantifier)</em>.
|
||||||
|
The methods that any <em>aggregative</em> quantifier must implement are:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span> <span class="nd">@abstractmethod</span>
|
||||||
|
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_learner</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span> <span class="o">...</span>
|
||||||
|
|
||||||
|
<span class="nd">@abstractmethod</span>
|
||||||
|
<span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span> <span class="o">...</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>since, as mentioned before, aggregative methods base their prediction on the
|
||||||
|
individual predictions of a classifier. Indeed, a default implementation
|
||||||
|
of <em>BaseQuantifier.quantify</em> is already provided, which looks like:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||||
|
<span class="n">classif_predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Aggregative quantifiers are expected to maintain a classifier (which is
|
||||||
|
accessed through the <em>@property</em> <em>classifier</em>). This classifier is
|
||||||
|
given as input to the quantifier, and can be already fit
|
||||||
|
on external data (in which case, the <em>fit_learner</em> argument should
|
||||||
|
be set to False), or be fit by the quantifier’s fit (default).</p>
|
||||||
|
<p>Another class of <em>aggregative</em> methods are the <em>probabilistic</em>
|
||||||
|
aggregative methods, that should inherit from the abstract class
|
||||||
|
<em>AggregativeProbabilisticQuantifier(AggregativeQuantifier)</em>.
|
||||||
|
The particularity of <em>probabilistic</em> aggregative methods (w.r.t.
|
||||||
|
non-probabilistic ones), is that the default quantifier is defined
|
||||||
|
in terms of the posterior probabilities returned by a probabilistic
|
||||||
|
classifier, and not by the crisp decisions of a hard classifier.
|
||||||
|
In any case, the interface <em>classify(instances)</em> remains unchanged.</p>
|
||||||
|
<p>One advantage of <em>aggregative</em> methods (either probabilistic or not)
|
||||||
|
is that the evaluation according to any sampling procedure (e.g.,
|
||||||
|
the <a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation">artificial sampling protocol</a>)
|
||||||
|
can be achieved very efficiently, since the entire set can be pre-classified
|
||||||
|
once, and the quantification estimations for different samples can directly
|
||||||
|
reuse these predictions, without requiring to classify each element every time.
|
||||||
|
QuaPy leverages this property to speed-up any procedure having to do with
|
||||||
|
quantification over samples, as is customarily done in model selection or
|
||||||
|
in evaluation.</p>
|
||||||
|
<section id="the-classify-count-variants">
|
||||||
|
<h3>The Classify & Count variants<a class="headerlink" href="#the-classify-count-variants" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p>QuaPy implements the four CC variants, i.e.:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p><em>CC</em> (Classify & Count), the simplest aggregative quantifier; one that
|
||||||
|
simply relies on the label predictions of a classifier to deliver class estimates.</p></li>
|
||||||
|
<li><p><em>ACC</em> (Adjusted Classify & Count), the adjusted variant of CC.</p></li>
|
||||||
|
<li><p><em>PCC</em> (Probabilistic Classify & Count), the probabilistic variant of CC that
|
||||||
|
relies on the soft estimations (or posterior probabilities) returned by a (probabilistic) classifier.</p></li>
|
||||||
|
<li><p><em>PACC</em> (Probabilistic Adjusted Classify & Count), the adjusted variant of PCC.</p></li>
|
||||||
|
</ul>
|
||||||
|
<p>The following code serves as a complete example using CC equipped
|
||||||
|
with a SVM as the classifier:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">sklearn.svm</span> <span class="kn">import</span> <span class="n">LinearSVC</span>
|
||||||
|
|
||||||
|
<span class="n">training</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_twitter</span><span class="p">(</span><span class="s1">'hcr'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
||||||
|
|
||||||
|
<span class="c1"># instantiate a classifier learner, in this case a SVM</span>
|
||||||
|
<span class="n">svm</span> <span class="o">=</span> <span class="n">LinearSVC</span><span class="p">()</span>
|
||||||
|
|
||||||
|
<span class="c1"># instantiate a Classify & Count with the SVM</span>
|
||||||
|
<span class="c1"># (an alias is available in qp.method.aggregative.ClassifyAndCount)</span>
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">CC</span><span class="p">(</span><span class="n">svm</span><span class="p">)</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
<span class="n">estim_prevalence</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The same code could be used to instantiate an ACC, by simply replacing
|
||||||
|
the instantiation of the model with:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">ACC</span><span class="p">(</span><span class="n">svm</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Note that the adjusted variants (ACC and PACC) need to estimate
|
||||||
|
some parameters for performing the adjustment (e.g., the
|
||||||
|
<em>true positive rate</em> and the <em>false positive rate</em> in case of
|
||||||
|
binary classification) that are estimated on a validation split
|
||||||
|
of the labelled set. In this case, the <strong>init</strong> method of
|
||||||
|
ACC defines an additional parameter, <em>val_split</em> which, by
|
||||||
|
default, is set to 0.4 and so, the 40% of the labelled data
|
||||||
|
will be used for estimating the parameters for adjusting the
|
||||||
|
predictions. This parameters can also be set with an integer,
|
||||||
|
indicating that the parameters should be estimated by means of
|
||||||
|
<em>k</em>-fold cross-validation, for which the integer indicates the
|
||||||
|
number <em>k</em> of folds. Finally, <em>val_split</em> can be set to a
|
||||||
|
specific held-out validation set (i.e., an instance of <em>LabelledCollection</em>).</p>
|
||||||
|
<p>The specification of <em>val_split</em> can be
|
||||||
|
postponed to the invokation of the fit method (if <em>val_split</em> was also
|
||||||
|
set in the constructor, the one specified at fit time would prevail),
|
||||||
|
e.g.:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">ACC</span><span class="p">(</span><span class="n">svm</span><span class="p">)</span>
|
||||||
|
<span class="c1"># perform 5-fold cross validation for estimating ACC's parameters</span>
|
||||||
|
<span class="c1"># (overrides the default val_split=0.4 in the constructor)</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The following code illustrates the case in which PCC is used:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">PCC</span><span class="p">(</span><span class="n">svm</span><span class="p">)</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
<span class="n">estim_prevalence</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="s1">'classifier:'</span><span class="p">,</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>In this case, QuaPy will print:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">The</span> <span class="n">learner</span> <span class="n">LinearSVC</span> <span class="n">does</span> <span class="ow">not</span> <span class="n">seem</span> <span class="n">to</span> <span class="n">be</span> <span class="n">probabilistic</span><span class="o">.</span> <span class="n">The</span> <span class="n">learner</span> <span class="n">will</span> <span class="n">be</span> <span class="n">calibrated</span><span class="o">.</span>
|
||||||
|
<span class="n">classifier</span><span class="p">:</span> <span class="n">CalibratedClassifierCV</span><span class="p">(</span><span class="n">base_estimator</span><span class="o">=</span><span class="n">LinearSVC</span><span class="p">(),</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The first output indicates that the learner (<em>LinearSVC</em> in this case)
|
||||||
|
is not a probabilistic classifier (i.e., it does not implement the
|
||||||
|
<em>predict_proba</em> method) and so, the classifier will be converted to
|
||||||
|
a probabilistic one through <a class="reference external" href="https://scikit-learn.org/stable/modules/calibration.html">calibration</a>.
|
||||||
|
As a result, the classifier that is printed in the second line points
|
||||||
|
to a <em>CalibratedClassifier</em> instance. Note that calibration can only
|
||||||
|
be applied to hard classifiers when <em>fit_learner=True</em>; an exception
|
||||||
|
will be raised otherwise.</p>
|
||||||
|
<p>Lastly, everything we said aboud ACC and PCC
|
||||||
|
applies to PACC as well.</p>
|
||||||
|
</section>
|
||||||
|
<section id="expectation-maximization-emq">
|
||||||
|
<h3>Expectation Maximization (EMQ)<a class="headerlink" href="#expectation-maximization-emq" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p>The Expectation Maximization Quantifier (EMQ), also known as
|
||||||
|
the SLD, is available at <em>qp.method.aggregative.EMQ</em> or via the
|
||||||
|
alias <em>qp.method.aggregative.ExpectationMaximizationQuantifier</em>.
|
||||||
|
The method is described in:</p>
|
||||||
|
<p><em>Saerens, M., Latinne, P., and Decaestecker, C. (2002). Adjusting the outputs of a classifier
|
||||||
|
to new a priori probabilities: A simple procedure. Neural Computation, 14(1):21–41.</em></p>
|
||||||
|
<p>EMQ works with a probabilistic classifier (if the classifier
|
||||||
|
given as input is a hard one, a calibration will be attempted).
|
||||||
|
Although this method was originally proposed for improving the
|
||||||
|
posterior probabilities of a probabilistic classifier, and not
|
||||||
|
for improving the estimation of prior probabilities, EMQ ranks
|
||||||
|
almost always among the most effective quantifiers in the
|
||||||
|
experiments we have carried out.</p>
|
||||||
|
<p>An example of use can be found below:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||||
|
|
||||||
|
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_twitter</span><span class="p">(</span><span class="s1">'hcr'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">EMQ</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
<span class="n">estim_prevalence</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p><em>New in v0.1.7</em>: EMQ now accepts two new parameters in the construction method, namely
|
||||||
|
<em>exact_train_prev</em> which allows to use the true training prevalence as the departing
|
||||||
|
prevalence estimation (default behaviour), or instead an approximation of it as
|
||||||
|
suggested by <a class="reference external" href="http://proceedings.mlr.press/v119/alexandari20a.html">Alexandari et al. (2020)</a>
|
||||||
|
(by setting <em>exact_train_prev=False</em>).
|
||||||
|
The other parameter is <em>recalib</em> which allows to indicate a calibration method, among those
|
||||||
|
proposed by <a class="reference external" href="http://proceedings.mlr.press/v119/alexandari20a.html">Alexandari et al. (2020)</a>,
|
||||||
|
including the Bias-Corrected Temperature Scaling, Vector Scaling, etc.
|
||||||
|
See the API documentation for further details.</p>
|
||||||
|
</section>
|
||||||
|
<section id="hellinger-distance-y-hdy">
|
||||||
|
<h3>Hellinger Distance y (HDy)<a class="headerlink" href="#hellinger-distance-y-hdy" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p>Implementation of the method based on the Hellinger Distance y (HDy) proposed by
|
||||||
|
<a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S0020025512004069">González-Castro, V., Alaiz-Rodrı́guez, R., and Alegre, E. (2013). Class distribution
|
||||||
|
estimation based on the Hellinger distance. Information Sciences, 218:146–164.</a></p>
|
||||||
|
<p>It is implemented in <em>qp.method.aggregative.HDy</em> (also accessible
|
||||||
|
through the allias <em>qp.method.aggregative.HellingerDistanceY</em>).
|
||||||
|
This method works with a probabilistic classifier (hard classifiers
|
||||||
|
can be used as well and will be calibrated) and requires a validation
|
||||||
|
set to estimate parameter for the mixture model. Just like
|
||||||
|
ACC and PACC, this quantifier receives a <em>val_split</em> argument
|
||||||
|
in the constructor (or in the fit method, in which case the previous
|
||||||
|
value is overridden) that can either be a float indicating the proportion
|
||||||
|
of training data to be taken as the validation set (in a random
|
||||||
|
stratified split), or a validation set (i.e., an instance of
|
||||||
|
<em>LabelledCollection</em>) itself.</p>
|
||||||
|
<p>HDy was proposed as a binary classifier and the implementation
|
||||||
|
provided in QuaPy accepts only binary datasets.</p>
|
||||||
|
<p>The following code shows an example of use:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||||
|
|
||||||
|
<span class="c1"># load a binary dataset</span>
|
||||||
|
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'hp'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">preprocessing</span><span class="o">.</span><span class="n">text2tfidf</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">HDy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
<span class="n">estim_prevalence</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p><em>New in v0.1.7:</em> QuaPy now provides an implementation of the generalized
|
||||||
|
“Distribution Matching” approaches for multiclass, inspired by the framework
|
||||||
|
of <a class="reference external" href="https://arxiv.org/abs/1606.00868">Firat (2016)</a>. One can instantiate
|
||||||
|
a variant of HDy for multiclass quantification as follows:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">mutliclassHDy</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">DistributionMatching</span><span class="p">(</span><span class="n">classifier</span><span class="o">=</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">divergence</span><span class="o">=</span><span class="s1">'HD'</span><span class="p">,</span> <span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p><em>New in v0.1.7:</em> QuaPy now provides an implementation of the “DyS”
|
||||||
|
framework proposed by <a class="reference external" href="https://ojs.aaai.org/index.php/AAAI/article/view/4376">Maletzke et al (2020)</a>
|
||||||
|
and the “SMM” method proposed by <a class="reference external" href="https://ieeexplore.ieee.org/document/9260028">Hassan et al (2019)</a>
|
||||||
|
(thanks to <em>Pablo González</em> for the contributions!)</p>
|
||||||
|
</section>
|
||||||
|
<section id="threshold-optimization-methods">
|
||||||
|
<h3>Threshold Optimization methods<a class="headerlink" href="#threshold-optimization-methods" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p><em>New in v0.1.7:</em> QuaPy now implements Forman’s threshold optimization methods;
|
||||||
|
see, e.g., <a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/1150402.1150423">(Forman 2006)</a>
|
||||||
|
and <a class="reference external" href="https://link.springer.com/article/10.1007/s10618-008-0097-y">(Forman 2008)</a>.
|
||||||
|
These include: T50, MAX, X, Median Sweep (MS), and its variant MS2.</p>
|
||||||
|
</section>
|
||||||
|
<section id="explicit-loss-minimization">
|
||||||
|
<h3>Explicit Loss Minimization<a class="headerlink" href="#explicit-loss-minimization" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p>The Explicit Loss Minimization (ELM) represent a family of methods
|
||||||
|
based on structured output learning, i.e., quantifiers relying on
|
||||||
|
classifiers that have been optimized targeting a
|
||||||
|
quantification-oriented evaluation measure.
|
||||||
|
The original methods are implemented in QuaPy as classify & count (CC)
|
||||||
|
quantifiers that use Joachim’s <a class="reference external" href="https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html">SVMperf</a>
|
||||||
|
as the underlying classifier, properly set to optimize for the desired loss.</p>
|
||||||
|
<p>In QuaPy, this can be more achieved by calling the functions:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p><em>newSVMQ</em>: returns the quantification method called SVM(Q) that optimizes for the metric <em>Q</em> defined
|
||||||
|
in <a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S003132031400291X"><em>Barranquero, J., Díez, J., and del Coz, J. J. (2015). Quantification-oriented learning based
|
||||||
|
on reliable classifiers. Pattern Recognition, 48(2):591–604.</em></a></p></li>
|
||||||
|
<li><p><em>newSVMKLD</em> and <em>newSVMNKLD</em>: returns the quantification method called SVM(KLD) and SVM(nKLD), standing for
|
||||||
|
Kullback-Leibler Divergence and Normalized Kullback-Leibler Divergence, as proposed in <a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/2700406"><em>Esuli, A. and Sebastiani, F. (2015).
|
||||||
|
Optimizing text quantifiers for multivariate loss functions.
|
||||||
|
ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27.</em></a></p></li>
|
||||||
|
<li><p><em>newSVMAE</em> and <em>newSVMRAE</em>: returns a quantification method called SVM(AE) and SVM(RAE) that optimizes for the (Mean) Absolute Error and for the
|
||||||
|
(Mean) Relative Absolute Error, as first used by
|
||||||
|
<a class="reference external" href="https://arxiv.org/abs/2011.02552"><em>Moreo, A. and Sebastiani, F. (2021). Tweet sentiment quantification: An experimental re-evaluation. PLOS ONE 17 (9), 1-23.</em></a></p></li>
|
||||||
|
</ul>
|
||||||
|
<p>the last two methods (SVM(AE) and SVM(RAE)) have been implemented in
|
||||||
|
QuaPy in order to make available ELM variants for what nowadays
|
||||||
|
are considered the most well-behaved evaluation metrics in quantification.</p>
|
||||||
|
<p>In order to make these models work, you would need to run the script
|
||||||
|
<em>prepare_svmperf.sh</em> (distributed along with QuaPy) that
|
||||||
|
downloads <em>SVMperf</em>’ source code, applies a patch that
|
||||||
|
implements the quantification oriented losses, and compiles the
|
||||||
|
sources.</p>
|
||||||
|
<p>If you want to add any custom loss, you would need to modify
|
||||||
|
the source code of <em>SVMperf</em> in order to implement it, and
|
||||||
|
assign a valid loss code to it. Then you must re-compile
|
||||||
|
the whole thing and instantiate the quantifier in QuaPy
|
||||||
|
as follows:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># you can either set the path to your custom svm_perf_quantification implementation</span>
|
||||||
|
<span class="c1"># in the environment variable, or as an argument to the constructor of ELM</span>
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SVMPERF_HOME'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'./path/to/svm_perf_quantification'</span>
|
||||||
|
|
||||||
|
<span class="c1"># assign an alias to your custom loss and the id you have assigned to it</span>
|
||||||
|
<span class="n">svmperf</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">classification</span><span class="o">.</span><span class="n">svmperf</span><span class="o">.</span><span class="n">SVMperf</span>
|
||||||
|
<span class="n">svmperf</span><span class="o">.</span><span class="n">valid_losses</span><span class="p">[</span><span class="s1">'mycustomloss'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">28</span>
|
||||||
|
|
||||||
|
<span class="c1"># instantiate the ELM method indicating the loss</span>
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">ELM</span><span class="p">(</span><span class="n">loss</span><span class="o">=</span><span class="s1">'mycustomloss'</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>All ELM are binary quantifiers since they rely on <em>SVMperf</em>, that
|
||||||
|
currently supports only binary classification.
|
||||||
|
ELM variants (any binary quantifier in general) can be extended
|
||||||
|
to operate in single-label scenarios trivially by adopting a
|
||||||
|
“one-vs-all” strategy (as, e.g., in
|
||||||
|
<a class="reference external" href="https://link.springer.com/article/10.1007/s13278-016-0327-z"><em>Gao, W. and Sebastiani, F. (2016). From classification to quantification in tweet sentiment
|
||||||
|
analysis. Social Network Analysis and Mining, 6(19):1–22</em></a>).
|
||||||
|
In QuaPy this is possible by using the <em>OneVsAll</em> class.</p>
|
||||||
|
<p>There are two ways for instantiating this class, <em>OneVsAllGeneric</em> that works for
|
||||||
|
any quantifier, and <em>OneVsAllAggregative</em> that is optimized for aggregative quantifiers.
|
||||||
|
In general, you can simply use the <em>getOneVsAll</em> function and QuaPy will choose
|
||||||
|
the more convenient of the two.</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">SVMQ</span>
|
||||||
|
|
||||||
|
<span class="c1"># load a single-label dataset (this one contains 3 classes)</span>
|
||||||
|
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_twitter</span><span class="p">(</span><span class="s1">'hcr'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="c1"># let qp know where svmperf is</span>
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SVMPERF_HOME'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'../svm_perf_quantification'</span>
|
||||||
|
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">getOneVsAll</span><span class="p">(</span><span class="n">SVMQ</span><span class="p">(),</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># run them on parallel</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
<span class="n">estim_prevalence</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Check the examples <em><span class="xref myst">explicit_loss_minimization.py</span></em>
|
||||||
|
and <span class="xref myst">one_vs_all.py</span> for more details.</p>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
<section id="meta-models">
|
||||||
|
<h2>Meta Models<a class="headerlink" href="#meta-models" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>By <em>meta</em> models we mean quantification methods that are defined on top of other
|
||||||
|
quantification methods, and that thus do not squarely belong to the aggregative nor
|
||||||
|
the non-aggregative group (indeed, <em>meta</em> models could use quantifiers from any of those
|
||||||
|
groups).
|
||||||
|
<em>Meta</em> models are implemented in the <em>qp.method.meta</em> module.</p>
|
||||||
|
<section id="ensembles">
|
||||||
|
<h3>Ensembles<a class="headerlink" href="#ensembles" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p>QuaPy implements (some of) the variants proposed in:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p><a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253516300628"><em>Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
|
||||||
|
Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
|
||||||
|
Information Fusion, 34, 87-100.</em></a></p></li>
|
||||||
|
<li><p><a class="reference external" href="https://www.sciencedirect.com/science/article/pii/S1566253517303652"><em>Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).
|
||||||
|
Dynamic ensemble selection for quantification tasks.
|
||||||
|
Information Fusion, 45, 1-15.</em></a></p></li>
|
||||||
|
</ul>
|
||||||
|
<p>The following code shows how to instantiate an Ensemble of 30 <em>Adjusted Classify & Count</em> (ACC)
|
||||||
|
quantifiers operating with a <em>Logistic Regressor</em> (LR) as the base classifier, and using the
|
||||||
|
<em>average</em> as the aggregation policy (see the original article for further details).
|
||||||
|
The last parameter indicates to use all processors for parallelization.</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">ACC</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">Ensemble</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||||
|
|
||||||
|
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIDataset</span><span class="p">(</span><span class="s1">'haberman'</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">Ensemble</span><span class="p">(</span><span class="n">quantifier</span><span class="o">=</span><span class="n">ACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">()),</span> <span class="n">size</span><span class="o">=</span><span class="mi">30</span><span class="p">,</span> <span class="n">policy</span><span class="o">=</span><span class="s1">'ave'</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
<span class="n">estim_prevalence</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>Other aggregation policies implemented in QuaPy include:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p>‘ptr’ for applying a dynamic selection based on the training prevalence of the ensemble’s members</p></li>
|
||||||
|
<li><p>‘ds’ for applying a dynamic selection based on the Hellinger Distance</p></li>
|
||||||
|
<li><p><em>any valid quantification measure</em> (e.g., ‘mse’) for performing a static selection based on
|
||||||
|
the performance estimated for each member of the ensemble in terms of that evaluation metric.</p></li>
|
||||||
|
</ul>
|
||||||
|
<p>When using any of the above options, it is important to set the <em>red_size</em> parameter, which
|
||||||
|
informs of the number of members to retain.</p>
|
||||||
|
<p>Please, check the <a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection">model selection</a>
|
||||||
|
wiki if you want to optimize the hyperparameters of ensemble for classification or quantification.</p>
|
||||||
|
</section>
|
||||||
|
<section id="the-quanet-neural-network">
|
||||||
|
<h3>The QuaNet neural network<a class="headerlink" href="#the-quanet-neural-network" title="Permalink to this heading">¶</a></h3>
|
||||||
|
<p>QuaPy offers an implementation of QuaNet, a deep learning model presented in:</p>
|
||||||
|
<p><a class="reference external" href="https://dl.acm.org/doi/abs/10.1145/3269206.3269287"><em>Esuli, A., Moreo, A., & Sebastiani, F. (2018, October).
|
||||||
|
A recurrent neural network for sentiment quantification.
|
||||||
|
In Proceedings of the 27th ACM International Conference on
|
||||||
|
Information and Knowledge Management (pp. 1775-1778).</em></a></p>
|
||||||
|
<p>This model requires <em>torch</em> to be installed.
|
||||||
|
QuaNet also requires a classifier that can provide embedded representations
|
||||||
|
of the inputs.
|
||||||
|
In the original paper, QuaNet was tested using an LSTM as the base classifier.
|
||||||
|
In the following example, we show an instantiation of QuaNet that instead uses CNN as a probabilistic classifier, taking its last layer representation as the document embedding:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">QuaNet</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">quapy.classification.neural</span> <span class="kn">import</span> <span class="n">NeuralClassifierTrainer</span><span class="p">,</span> <span class="n">CNNnet</span>
|
||||||
|
|
||||||
|
<span class="c1"># use samples of 100 elements</span>
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
|
||||||
|
|
||||||
|
<span class="c1"># load the kindle dataset as text, and convert words to numerical indexes</span>
|
||||||
|
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'kindle'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">preprocessing</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="c1"># the text classifier is a CNN trained by NeuralClassifierTrainer</span>
|
||||||
|
<span class="n">cnn</span> <span class="o">=</span> <span class="n">CNNnet</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
|
||||||
|
<span class="n">learner</span> <span class="o">=</span> <span class="n">NeuralClassifierTrainer</span><span class="p">(</span><span class="n">cnn</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="c1"># train QuaNet</span>
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">QuaNet</span><span class="p">(</span><span class="n">learner</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
<span class="n">estim_prevalence</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||||
|
<div class="sphinxsidebarwrapper">
|
||||||
|
<div>
|
||||||
|
<h3><a href="index.html">Table of Contents</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li><a class="reference internal" href="#">Quantification Methods</a><ul>
|
||||||
|
<li><a class="reference internal" href="#aggregative-methods">Aggregative Methods</a><ul>
|
||||||
|
<li><a class="reference internal" href="#the-classify-count-variants">The Classify & Count variants</a></li>
|
||||||
|
<li><a class="reference internal" href="#expectation-maximization-emq">Expectation Maximization (EMQ)</a></li>
|
||||||
|
<li><a class="reference internal" href="#hellinger-distance-y-hdy">Hellinger Distance y (HDy)</a></li>
|
||||||
|
<li><a class="reference internal" href="#threshold-optimization-methods">Threshold Optimization methods</a></li>
|
||||||
|
<li><a class="reference internal" href="#explicit-loss-minimization">Explicit Loss Minimization</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li><a class="reference internal" href="#meta-models">Meta Models</a><ul>
|
||||||
|
<li><a class="reference internal" href="#ensembles">Ensembles</a></li>
|
||||||
|
<li><a class="reference internal" href="#the-quanet-neural-network">The QuaNet neural network</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Previous topic</h4>
|
||||||
|
<p class="topless"><a href="Protocols.html"
|
||||||
|
title="previous chapter">Protocols</a></p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Next topic</h4>
|
||||||
|
<p class="topless"><a href="Model-Selection.html"
|
||||||
|
title="next chapter">Model Selection</a></p>
|
||||||
|
</div>
|
||||||
|
<div role="note" aria-label="source link">
|
||||||
|
<h3>This Page</h3>
|
||||||
|
<ul class="this-page-menu">
|
||||||
|
<li><a href="_sources/Methods.md.txt"
|
||||||
|
rel="nofollow">Show Source</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div id="searchbox" style="display: none" role="search">
|
||||||
|
<h3 id="searchlabel">Quick search</h3>
|
||||||
|
<div class="searchformwrapper">
|
||||||
|
<form class="search" action="search.html" method="get">
|
||||||
|
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||||
|
<input type="submit" value="Go" />
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
>index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Model-Selection.html" title="Model Selection"
|
||||||
|
>next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Protocols.html" title="Protocols"
|
||||||
|
>previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Quantification Methods</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="footer" role="contentinfo">
|
||||||
|
© Copyright 2021, Alejandro Moreo.
|
||||||
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -0,0 +1,268 @@
|
||||||
|
|
||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||||
|
|
||||||
|
<title>Model Selection — QuaPy 0.1.7 documentation</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/bizstyle.css" />
|
||||||
|
|
||||||
|
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
|
||||||
|
<script src="_static/jquery.js"></script>
|
||||||
|
<script src="_static/underscore.js"></script>
|
||||||
|
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||||
|
<script src="_static/doctools.js"></script>
|
||||||
|
<script src="_static/sphinx_highlight.js"></script>
|
||||||
|
<script src="_static/bizstyle.js"></script>
|
||||||
|
<link rel="index" title="Index" href="genindex.html" />
|
||||||
|
<link rel="search" title="Search" href="search.html" />
|
||||||
|
<link rel="next" title="Plotting" href="Plotting.html" />
|
||||||
|
<link rel="prev" title="Quantification Methods" href="Methods.html" />
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1.0" />
|
||||||
|
<!--[if lt IE 9]>
|
||||||
|
<script src="_static/css3-mediaqueries.js"></script>
|
||||||
|
<![endif]-->
|
||||||
|
</head><body>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
accesskey="I">index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Plotting.html" title="Plotting"
|
||||||
|
accesskey="N">next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Methods.html" title="Quantification Methods"
|
||||||
|
accesskey="P">previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Model Selection</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="document">
|
||||||
|
<div class="documentwrapper">
|
||||||
|
<div class="bodywrapper">
|
||||||
|
<div class="body" role="main">
|
||||||
|
|
||||||
|
<section id="model-selection">
|
||||||
|
<h1>Model Selection<a class="headerlink" href="#model-selection" title="Permalink to this heading">¶</a></h1>
|
||||||
|
<p>As a supervised machine learning task, quantification methods
|
||||||
|
can strongly depend on a good choice of model hyper-parameters.
|
||||||
|
The process whereby those hyper-parameters are chosen is
|
||||||
|
typically known as <em>Model Selection</em>, and typically consists of
|
||||||
|
testing different settings and picking the one that performed
|
||||||
|
best in a held-out validation set in terms of any given
|
||||||
|
evaluation measure.</p>
|
||||||
|
<section id="targeting-a-quantification-oriented-loss">
|
||||||
|
<h2>Targeting a Quantification-oriented loss<a class="headerlink" href="#targeting-a-quantification-oriented-loss" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>The task being optimized determines the evaluation protocol,
|
||||||
|
i.e., the criteria according to which the performance of
|
||||||
|
any given method for solving is to be assessed.
|
||||||
|
As a task on its own right, quantification should impose
|
||||||
|
its own model selection strategies, i.e., strategies
|
||||||
|
aimed at finding appropriate configurations
|
||||||
|
specifically designed for the task of quantification.</p>
|
||||||
|
<p>Quantification has long been regarded as an add-on of
|
||||||
|
classification, and thus the model selection strategies
|
||||||
|
customarily adopted in classification have simply been
|
||||||
|
applied to quantification (see the next section).
|
||||||
|
It has been argued in <a class="reference external" href="https://link.springer.com/chapter/10.1007/978-3-030-72240-1_6">Moreo, Alejandro, and Fabrizio Sebastiani.
|
||||||
|
Re-Assessing the “Classify and Count” Quantification Method.
|
||||||
|
ECIR 2021: Advances in Information Retrieval pp 75–91.</a>
|
||||||
|
that specific model selection strategies should
|
||||||
|
be adopted for quantification. That is, model selection
|
||||||
|
strategies for quantification should target
|
||||||
|
quantification-oriented losses and be tested in a variety
|
||||||
|
of scenarios exhibiting different degrees of prior
|
||||||
|
probability shift.</p>
|
||||||
|
<p>The class <em>qp.model_selection.GridSearchQ</em> implements a grid-search exploration over the space of
|
||||||
|
hyper-parameter combinations that <a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation">evaluates</a>
|
||||||
|
each combination of hyper-parameters by means of a given quantification-oriented
|
||||||
|
error metric (e.g., any of the error functions implemented
|
||||||
|
in <em>qp.error</em>) and according to a
|
||||||
|
<a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/wiki/Protocols">sampling generation protocol</a>.</p>
|
||||||
|
<p>The following is an example (also included in the examples folder) of model selection for quantification:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">DistributionMatching</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||||
|
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||||
|
|
||||||
|
<span class="sd">"""</span>
|
||||||
|
<span class="sd">In this example, we show how to perform model selection on a DistributionMatching quantifier.</span>
|
||||||
|
<span class="sd">"""</span>
|
||||||
|
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">DistributionMatching</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
||||||
|
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'N_JOBS'</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="c1"># explore hyper-parameters in parallel</span>
|
||||||
|
|
||||||
|
<span class="n">training</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
||||||
|
|
||||||
|
<span class="c1"># The model will be returned by the fit method of GridSearchQ.</span>
|
||||||
|
<span class="c1"># Every combination of hyper-parameters will be evaluated by confronting the</span>
|
||||||
|
<span class="c1"># quantifier thus configured against a series of samples generated by means</span>
|
||||||
|
<span class="c1"># of a sample generation protocol. For this example, we will use the</span>
|
||||||
|
<span class="c1"># artificial-prevalence protocol (APP), that generates samples with prevalence</span>
|
||||||
|
<span class="c1"># values in the entire range of values from a grid (e.g., [0, 0.1, 0.2, ..., 1]).</span>
|
||||||
|
<span class="c1"># We devote 30% of the dataset for this exploration.</span>
|
||||||
|
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">)</span>
|
||||||
|
<span class="n">protocol</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="c1"># We will explore a classification-dependent hyper-parameter (e.g., the 'C'</span>
|
||||||
|
<span class="c1"># hyper-parameter of LogisticRegression) and a quantification-dependent hyper-parameter</span>
|
||||||
|
<span class="c1"># (e.g., the number of bins in a DistributionMatching quantifier.</span>
|
||||||
|
<span class="c1"># Classifier-dependent hyper-parameters have to be marked with a prefix "classifier__"</span>
|
||||||
|
<span class="c1"># in order to let the quantifier know this hyper-parameter belongs to its underlying</span>
|
||||||
|
<span class="c1"># classifier.</span>
|
||||||
|
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span>
|
||||||
|
<span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">),</span>
|
||||||
|
<span class="s1">'nbins'</span><span class="p">:</span> <span class="p">[</span><span class="mi">8</span><span class="p">,</span> <span class="mi">16</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">64</span><span class="p">],</span>
|
||||||
|
<span class="p">}</span>
|
||||||
|
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">GridSearchQ</span><span class="p">(</span>
|
||||||
|
<span class="n">model</span><span class="o">=</span><span class="n">model</span><span class="p">,</span>
|
||||||
|
<span class="n">param_grid</span><span class="o">=</span><span class="n">param_grid</span><span class="p">,</span>
|
||||||
|
<span class="n">protocol</span><span class="o">=</span><span class="n">protocol</span><span class="p">,</span>
|
||||||
|
<span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="c1"># the error to optimize is the MAE (a quantification-oriented loss)</span>
|
||||||
|
<span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="c1"># retrain on the whole labelled set once done</span>
|
||||||
|
<span class="n">verbose</span><span class="o">=</span><span class="kc">True</span> <span class="c1"># show information as the process goes on</span>
|
||||||
|
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'model selection ended: best hyper-parameters=</span><span class="si">{</span><span class="n">model</span><span class="o">.</span><span class="n">best_params_</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">best_model_</span>
|
||||||
|
|
||||||
|
<span class="c1"># evaluation in terms of MAE</span>
|
||||||
|
<span class="c1"># we use the same evaluation protocol (APP) on the test set</span>
|
||||||
|
<span class="n">mae_score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">),</span> <span class="n">error_metric</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'MAE=</span><span class="si">{</span><span class="n">mae_score</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>In this example, the system outputs:</p>
|
||||||
|
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="p">[</span><span class="n">GridSearchQ</span><span class="p">]:</span> <span class="n">starting</span> <span class="n">model</span> <span class="n">selection</span> <span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=-</span><span class="mi">1</span>
|
||||||
|
<span class="p">[</span><span class="n">GridSearchQ</span><span class="p">]:</span> <span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="mf">0.01</span><span class="p">,</span> <span class="s1">'nbins'</span><span class="p">:</span> <span class="mi">64</span><span class="p">}</span> <span class="n">got</span> <span class="n">mae</span> <span class="n">score</span> <span class="mf">0.04021</span> <span class="p">[</span><span class="n">took</span> <span class="mf">1.1356</span><span class="n">s</span><span class="p">]</span>
|
||||||
|
<span class="p">[</span><span class="n">GridSearchQ</span><span class="p">]:</span> <span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="mf">0.01</span><span class="p">,</span> <span class="s1">'nbins'</span><span class="p">:</span> <span class="mi">32</span><span class="p">}</span> <span class="n">got</span> <span class="n">mae</span> <span class="n">score</span> <span class="mf">0.04286</span> <span class="p">[</span><span class="n">took</span> <span class="mf">1.2139</span><span class="n">s</span><span class="p">]</span>
|
||||||
|
<span class="p">[</span><span class="n">GridSearchQ</span><span class="p">]:</span> <span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="mf">0.01</span><span class="p">,</span> <span class="s1">'nbins'</span><span class="p">:</span> <span class="mi">16</span><span class="p">}</span> <span class="n">got</span> <span class="n">mae</span> <span class="n">score</span> <span class="mf">0.04888</span> <span class="p">[</span><span class="n">took</span> <span class="mf">1.2491</span><span class="n">s</span><span class="p">]</span>
|
||||||
|
<span class="p">[</span><span class="n">GridSearchQ</span><span class="p">]:</span> <span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="mf">0.001</span><span class="p">,</span> <span class="s1">'nbins'</span><span class="p">:</span> <span class="mi">8</span><span class="p">}</span> <span class="n">got</span> <span class="n">mae</span> <span class="n">score</span> <span class="mf">0.05163</span> <span class="p">[</span><span class="n">took</span> <span class="mf">1.5372</span><span class="n">s</span><span class="p">]</span>
|
||||||
|
<span class="p">[</span><span class="o">...</span><span class="p">]</span>
|
||||||
|
<span class="p">[</span><span class="n">GridSearchQ</span><span class="p">]:</span> <span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="mf">1000.0</span><span class="p">,</span> <span class="s1">'nbins'</span><span class="p">:</span> <span class="mi">32</span><span class="p">}</span> <span class="n">got</span> <span class="n">mae</span> <span class="n">score</span> <span class="mf">0.02445</span> <span class="p">[</span><span class="n">took</span> <span class="mf">2.9056</span><span class="n">s</span><span class="p">]</span>
|
||||||
|
<span class="p">[</span><span class="n">GridSearchQ</span><span class="p">]:</span> <span class="n">optimization</span> <span class="n">finished</span><span class="p">:</span> <span class="n">best</span> <span class="n">params</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="mf">100.0</span><span class="p">,</span> <span class="s1">'nbins'</span><span class="p">:</span> <span class="mi">32</span><span class="p">}</span> <span class="p">(</span><span class="n">score</span><span class="o">=</span><span class="mf">0.02234</span><span class="p">)</span> <span class="p">[</span><span class="n">took</span> <span class="mf">7.3114</span><span class="n">s</span><span class="p">]</span>
|
||||||
|
<span class="p">[</span><span class="n">GridSearchQ</span><span class="p">]:</span> <span class="n">refitting</span> <span class="n">on</span> <span class="n">the</span> <span class="n">whole</span> <span class="n">development</span> <span class="nb">set</span>
|
||||||
|
<span class="n">model</span> <span class="n">selection</span> <span class="n">ended</span><span class="p">:</span> <span class="n">best</span> <span class="n">hyper</span><span class="o">-</span><span class="n">parameters</span><span class="o">=</span><span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="mf">100.0</span><span class="p">,</span> <span class="s1">'nbins'</span><span class="p">:</span> <span class="mi">32</span><span class="p">}</span>
|
||||||
|
<span class="n">MAE</span><span class="o">=</span><span class="mf">0.03102</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>The parameter <em>val_split</em> can alternatively be used to indicate
|
||||||
|
a validation set (i.e., an instance of <em>LabelledCollection</em>) instead
|
||||||
|
of a proportion. This could be useful if one wants to have control
|
||||||
|
on the specific data split to be used across different model selection
|
||||||
|
experiments.</p>
|
||||||
|
</section>
|
||||||
|
<section id="targeting-a-classification-oriented-loss">
|
||||||
|
<h2>Targeting a Classification-oriented loss<a class="headerlink" href="#targeting-a-classification-oriented-loss" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>Optimizing a model for quantification could rather be
|
||||||
|
computationally costly.
|
||||||
|
In aggregative methods, one could alternatively try to optimize
|
||||||
|
the classifier’s hyper-parameters for classification.
|
||||||
|
Although this is theoretically suboptimal, many articles in
|
||||||
|
quantification literature have opted for this strategy.</p>
|
||||||
|
<p>In QuaPy, this is achieved by simply instantiating the
|
||||||
|
classifier learner as a GridSearchCV from scikit-learn.
|
||||||
|
The following code illustrates how to do that:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">learner</span> <span class="o">=</span> <span class="n">GridSearchCV</span><span class="p">(</span>
|
||||||
|
<span class="n">LogisticRegression</span><span class="p">(),</span>
|
||||||
|
<span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">'C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">),</span> <span class="s1">'class_weight'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'balanced'</span><span class="p">,</span> <span class="kc">None</span><span class="p">]},</span>
|
||||||
|
<span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">DistributionMatching</span><span class="p">(</span><span class="n">learner</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>However, this is conceptually flawed, since the model should be
|
||||||
|
optimized for the task at hand (quantification), and not for a surrogate task (classification),
|
||||||
|
i.e., the model should be requested to deliver low quantification errors, rather
|
||||||
|
than low classification errors.</p>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||||
|
<div class="sphinxsidebarwrapper">
|
||||||
|
<div>
|
||||||
|
<h3><a href="index.html">Table of Contents</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li><a class="reference internal" href="#">Model Selection</a><ul>
|
||||||
|
<li><a class="reference internal" href="#targeting-a-quantification-oriented-loss">Targeting a Quantification-oriented loss</a></li>
|
||||||
|
<li><a class="reference internal" href="#targeting-a-classification-oriented-loss">Targeting a Classification-oriented loss</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Previous topic</h4>
|
||||||
|
<p class="topless"><a href="Methods.html"
|
||||||
|
title="previous chapter">Quantification Methods</a></p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Next topic</h4>
|
||||||
|
<p class="topless"><a href="Plotting.html"
|
||||||
|
title="next chapter">Plotting</a></p>
|
||||||
|
</div>
|
||||||
|
<div role="note" aria-label="source link">
|
||||||
|
<h3>This Page</h3>
|
||||||
|
<ul class="this-page-menu">
|
||||||
|
<li><a href="_sources/Model-Selection.md.txt"
|
||||||
|
rel="nofollow">Show Source</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div id="searchbox" style="display: none" role="search">
|
||||||
|
<h3 id="searchlabel">Quick search</h3>
|
||||||
|
<div class="searchformwrapper">
|
||||||
|
<form class="search" action="search.html" method="get">
|
||||||
|
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||||
|
<input type="submit" value="Go" />
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
>index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Plotting.html" title="Plotting"
|
||||||
|
>next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Methods.html" title="Quantification Methods"
|
||||||
|
>previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Model Selection</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="footer" role="contentinfo">
|
||||||
|
© Copyright 2021, Alejandro Moreo.
|
||||||
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
|
@ -0,0 +1,350 @@
|
||||||
|
|
||||||
|
|
||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />
|
||||||
|
|
||||||
|
<title>Plotting — QuaPy 0.1.7 documentation</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/pygments.css" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="_static/bizstyle.css" />
|
||||||
|
|
||||||
|
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
|
||||||
|
<script src="_static/jquery.js"></script>
|
||||||
|
<script src="_static/underscore.js"></script>
|
||||||
|
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||||
|
<script src="_static/doctools.js"></script>
|
||||||
|
<script src="_static/sphinx_highlight.js"></script>
|
||||||
|
<script src="_static/bizstyle.js"></script>
|
||||||
|
<link rel="index" title="Index" href="genindex.html" />
|
||||||
|
<link rel="search" title="Search" href="search.html" />
|
||||||
|
<link rel="next" title="quapy" href="modules.html" />
|
||||||
|
<link rel="prev" title="Model Selection" href="Model-Selection.html" />
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1.0" />
|
||||||
|
<!--[if lt IE 9]>
|
||||||
|
<script src="_static/css3-mediaqueries.js"></script>
|
||||||
|
<![endif]-->
|
||||||
|
</head><body>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
accesskey="I">index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="modules.html" title="quapy"
|
||||||
|
accesskey="N">next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Model-Selection.html" title="Model Selection"
|
||||||
|
accesskey="P">previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Plotting</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="document">
|
||||||
|
<div class="documentwrapper">
|
||||||
|
<div class="bodywrapper">
|
||||||
|
<div class="body" role="main">
|
||||||
|
|
||||||
|
<section id="plotting">
|
||||||
|
<h1>Plotting<a class="headerlink" href="#plotting" title="Permalink to this heading">¶</a></h1>
|
||||||
|
<p>The module <em>qp.plot</em> implements some basic plotting functions
|
||||||
|
that can help analyse the performance of a quantification method.</p>
|
||||||
|
<p>All plotting functions receive as inputs the outcomes of
|
||||||
|
some experiments and include, for each experiment,
|
||||||
|
the following three main arguments:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p><em>method_names</em> a list containing the names of the quantification methods</p></li>
|
||||||
|
<li><p><em>true_prevs</em> a list containing matrices of true prevalences</p></li>
|
||||||
|
<li><p><em>estim_prevs</em> a list containing matrices of estimated prevalences
|
||||||
|
(should be of the same shape as the corresponding matrix in <em>true_prevs</em>)</p></li>
|
||||||
|
</ul>
|
||||||
|
<p>Note that a method (as indicated by a name in <em>method_names</em>) can
|
||||||
|
appear more than once. This could occur when various datasets are
|
||||||
|
involved in the experiments. In this case, all experiments for the
|
||||||
|
method will be merged and the plot will represent the method’s
|
||||||
|
performance across various datasets.</p>
|
||||||
|
<p>This is a very simple example of a valid input for the plotting functions:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">method_names</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'classify & count'</span><span class="p">,</span> <span class="s1">'EMQ'</span><span class="p">,</span> <span class="s1">'classify & count'</span><span class="p">]</span>
|
||||||
|
<span class="n">true_prevs</span> <span class="o">=</span> <span class="p">[</span>
|
||||||
|
<span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">]]),</span>
|
||||||
|
<span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">]]),</span>
|
||||||
|
<span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">]]),</span>
|
||||||
|
<span class="p">]</span>
|
||||||
|
<span class="n">estim_prevs</span> <span class="o">=</span> <span class="p">[</span>
|
||||||
|
<span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mf">0.45</span><span class="p">,</span> <span class="mf">0.55</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.6</span><span class="p">,</span> <span class="mf">0.4</span><span class="p">]]),</span>
|
||||||
|
<span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.2</span><span class="p">,</span> <span class="mf">0.8</span><span class="p">]]),</span>
|
||||||
|
<span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.9</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.3</span><span class="p">,</span> <span class="mf">0.7</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">]]),</span>
|
||||||
|
<span class="p">]</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>in which the <em>classify & count</em> has been tested in two datasets and
|
||||||
|
the <em>EMQ</em> method has been tested only in one dataset. For the first
|
||||||
|
experiment, only two (binary) quantifications have been tested,
|
||||||
|
while for the second and third experiments three instances have
|
||||||
|
been tested.</p>
|
||||||
|
<p>In general, we would like to test the performance of the
|
||||||
|
quantification methods across different scenarios showcasing
|
||||||
|
the accuracy of the quantifier in predicting class prevalences
|
||||||
|
for a wide range of prior distributions. This can easily be
|
||||||
|
achieved by means of the
|
||||||
|
<a class="reference external" href="https://github.com/HLT-ISTI/QuaPy/wiki/Protocols">artificial sampling protocol</a>
|
||||||
|
that is implemented in QuaPy.</p>
|
||||||
|
<p>The following code shows how to perform one simple experiment
|
||||||
|
in which the 4 <em>CC-variants</em>, all equipped with a linear SVM, are
|
||||||
|
applied to one binary dataset of reviews about <em>Kindle</em> devices and
|
||||||
|
tested across the entire spectrum of class priors (taking 21 splits
|
||||||
|
of the interval [0,1], i.e., using prevalence steps of 0.05, and
|
||||||
|
generating 100 random samples at each prevalence).</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">protocol</span> <span class="kn">import</span> <span class="n">APP</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">CC</span><span class="p">,</span> <span class="n">ACC</span><span class="p">,</span> <span class="n">PCC</span><span class="p">,</span> <span class="n">PACC</span>
|
||||||
|
<span class="kn">from</span> <span class="nn">sklearn.svm</span> <span class="kn">import</span> <span class="n">LinearSVC</span>
|
||||||
|
|
||||||
|
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">500</span>
|
||||||
|
|
||||||
|
<span class="k">def</span> <span class="nf">gen_data</span><span class="p">():</span>
|
||||||
|
|
||||||
|
<span class="k">def</span> <span class="nf">base_classifier</span><span class="p">():</span>
|
||||||
|
<span class="k">return</span> <span class="n">LinearSVC</span><span class="p">(</span><span class="n">class_weight</span><span class="o">=</span><span class="s1">'balanced'</span><span class="p">)</span>
|
||||||
|
|
||||||
|
<span class="k">def</span> <span class="nf">models</span><span class="p">():</span>
|
||||||
|
<span class="k">yield</span> <span class="s1">'CC'</span><span class="p">,</span> <span class="n">CC</span><span class="p">(</span><span class="n">base_classifier</span><span class="p">())</span>
|
||||||
|
<span class="k">yield</span> <span class="s1">'ACC'</span><span class="p">,</span> <span class="n">ACC</span><span class="p">(</span><span class="n">base_classifier</span><span class="p">())</span>
|
||||||
|
<span class="k">yield</span> <span class="s1">'PCC'</span><span class="p">,</span> <span class="n">PCC</span><span class="p">(</span><span class="n">base_classifier</span><span class="p">())</span>
|
||||||
|
<span class="k">yield</span> <span class="s1">'PACC'</span><span class="p">,</span> <span class="n">PACC</span><span class="p">(</span><span class="n">base_classifier</span><span class="p">())</span>
|
||||||
|
|
||||||
|
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'kindle'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
||||||
|
|
||||||
|
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||||
|
|
||||||
|
<span class="k">for</span> <span class="n">method_name</span><span class="p">,</span> <span class="n">model</span> <span class="ow">in</span> <span class="n">models</span><span class="p">():</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
||||||
|
<span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">prediction</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span>
|
||||||
|
|
||||||
|
<span class="n">method_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method_name</span><span class="p">)</span>
|
||||||
|
<span class="n">true_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">true_prev</span><span class="p">)</span>
|
||||||
|
<span class="n">estim_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prev</span><span class="p">)</span>
|
||||||
|
<span class="n">tr_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">train</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())</span>
|
||||||
|
|
||||||
|
<span class="k">return</span> <span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span>
|
||||||
|
|
||||||
|
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span> <span class="o">=</span> <span class="n">gen_data</span><span class="p">()</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>the plots that can be generated are explained below.</p>
|
||||||
|
<section id="diagonal-plot">
|
||||||
|
<h2>Diagonal Plot<a class="headerlink" href="#diagonal-plot" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>The <em>diagonal</em> plot shows a very insightful view of the
|
||||||
|
quantifier’s performance. It plots the predicted class
|
||||||
|
prevalence (in the y-axis) against the true class prevalence
|
||||||
|
(in the x-axis). Unfortunately, it is limited to binary quantification,
|
||||||
|
although one can simply generate as many <em>diagonal</em> plots as
|
||||||
|
classes there are by indicating which class should be considered
|
||||||
|
the target of the plot.</p>
|
||||||
|
<p>The following call will produce the plot:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">qp</span><span class="o">.</span><span class="n">plot</span><span class="o">.</span><span class="n">binary_diagonal</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">train_prev</span><span class="o">=</span><span class="n">tr_prevs</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">savepath</span><span class="o">=</span><span class="s1">'./plots/bin_diag.png'</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>the last argument is optional, and indicates the path where to save
|
||||||
|
the plot (the file extension will determine the format – typical extensions
|
||||||
|
are ‘.png’ or ‘.pdf’). If this path is not provided, then the plot
|
||||||
|
will be shown but not saved.
|
||||||
|
The resulting plot should look like:</p>
|
||||||
|
<p><img alt="diagonal plot on Kindle" src="_images/bin_diag.png" /></p>
|
||||||
|
<p>Note that in this case, we are also indicating the training
|
||||||
|
prevalence, which is plotted in the diagonal a as cyan dot.
|
||||||
|
The color bands indicate the standard deviations of the predictions,
|
||||||
|
and can be hidden by setting the argument <em>show_std=False</em> (see
|
||||||
|
the complete list of arguments in the documentation).</p>
|
||||||
|
<p>Finally, note how most quantifiers, and specially the “unadjusted”
|
||||||
|
variants CC and PCC, are strongly biased towards the
|
||||||
|
prevalence seen during training.</p>
|
||||||
|
</section>
|
||||||
|
<section id="quantification-bias">
|
||||||
|
<h2>Quantification bias<a class="headerlink" href="#quantification-bias" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>This plot aims at evincing the bias that any quantifier
|
||||||
|
displays with respect to the training prevalences by
|
||||||
|
means of <a class="reference external" href="https://en.wikipedia.org/wiki/Box_plot">box plots</a>.
|
||||||
|
This plot can be generated by:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">qp</span><span class="o">.</span><span class="n">plot</span><span class="o">.</span><span class="n">binary_bias_global</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="s1">'./plots/bin_bias.png'</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>and should look like:</p>
|
||||||
|
<p><img alt="bias plot on Kindle" src="_images/bin_bias.png" /></p>
|
||||||
|
<p>The box plots show some interesting facts:</p>
|
||||||
|
<ul class="simple">
|
||||||
|
<li><p>all methods are biased towards the training prevalence but specially
|
||||||
|
so CC and PCC (an unbiased quantifier would have a box centered at 0)</p></li>
|
||||||
|
<li><p>the bias is always positive, indicating that all methods tend to
|
||||||
|
overestimate the positive class prevalence</p></li>
|
||||||
|
<li><p>CC and PCC have high variability while ACC and specially PACC exhibit
|
||||||
|
lower variability.</p></li>
|
||||||
|
</ul>
|
||||||
|
<p>Again, these plots could be generated for experiments ranging across
|
||||||
|
different datasets, and the plot will merge all data accordingly.</p>
|
||||||
|
<p>Another illustrative example can be shown that consists of
|
||||||
|
training different CC quantifiers trained at different
|
||||||
|
(artificially sampled) training prevalences.
|
||||||
|
For this example, we generate training samples of 5000
|
||||||
|
documents containing 10%, 20%, …, 90% of positives from the
|
||||||
|
IMDb dataset, and generate the bias plot again.
|
||||||
|
This example can be run by rewritting the <em>gen_data()</em> function
|
||||||
|
like this:</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">gen_data</span><span class="p">():</span>
|
||||||
|
|
||||||
|
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
||||||
|
<span class="n">model</span> <span class="o">=</span> <span class="n">CC</span><span class="p">(</span><span class="n">LinearSVC</span><span class="p">())</span>
|
||||||
|
|
||||||
|
<span class="n">method_data</span> <span class="o">=</span> <span class="p">[]</span>
|
||||||
|
<span class="k">for</span> <span class="n">training_prevalence</span> <span class="ow">in</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.9</span><span class="p">,</span> <span class="mi">9</span><span class="p">):</span>
|
||||||
|
<span class="n">training_size</span> <span class="o">=</span> <span class="mi">5000</span>
|
||||||
|
<span class="c1"># since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained</span>
|
||||||
|
<span class="n">train_sample</span> <span class="o">=</span> <span class="n">train</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="n">training_size</span><span class="p">,</span> <span class="mi">1</span><span class="o">-</span><span class="n">training_prevalence</span><span class="p">)</span>
|
||||||
|
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train_sample</span><span class="p">)</span>
|
||||||
|
<span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">prediction</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span>
|
||||||
|
<span class="n">method_name</span> <span class="o">=</span> <span class="s1">'CC$_{'</span><span class="o">+</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="nb">int</span><span class="p">(</span><span class="mi">100</span><span class="o">*</span><span class="n">training_prevalence</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span> <span class="o">+</span> <span class="s1">'\%}$'</span>
|
||||||
|
<span class="n">method_data</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">method_name</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">,</span> <span class="n">train_sample</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()))</span>
|
||||||
|
|
||||||
|
<span class="k">return</span> <span class="nb">zip</span><span class="p">(</span><span class="o">*</span><span class="n">method_data</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p>and the plot should now look like:</p>
|
||||||
|
<p><img alt="bias plot on IMDb" src="_images/bin_bias_cc.png" /></p>
|
||||||
|
<p>which clearly shows a negative bias for CC variants trained on
|
||||||
|
data containing more negatives (i.e., < 50%) and positive biases
|
||||||
|
in cases containing more positives (i.e., >50%). The CC trained
|
||||||
|
at 50% behaves as an unbiased estimator of the positive class
|
||||||
|
prevalence.</p>
|
||||||
|
<p>The function <em>qp.plot.binary_bias_bins</em> allows the user to
|
||||||
|
generate box plots broken down by bins of true test prevalence.
|
||||||
|
To this aim, an argument <em>nbins</em> is passed which indicates
|
||||||
|
how many isometric subintervals to take. For example
|
||||||
|
the following plot is produced for <em>nbins=3</em>:</p>
|
||||||
|
<p><img alt="bias plot on IMDb" src="_images/bin_bias_bin_cc.png" /></p>
|
||||||
|
<p>Interestingly enough, the seemingly unbiased estimator (CC at 50%) happens to display
|
||||||
|
a positive bias (or a tendency to overestimate) in cases of low prevalence
|
||||||
|
(i.e., when the true prevalence of the positive class is below 33%),
|
||||||
|
and a negative bias (or a tendency to underestimate) in cases of high prevalence
|
||||||
|
(i.e., when the true prevalence is beyond 67%).</p>
|
||||||
|
<p>Out of curiosity, the diagonal plot for this experiment looks like:</p>
|
||||||
|
<p><img alt="diag plot on IMDb" src="_images/bin_diag_cc.png" /></p>
|
||||||
|
<p>showing pretty clearly the dependency of CC on the prior probabilities
|
||||||
|
of the labeled set it was trained on.</p>
|
||||||
|
</section>
|
||||||
|
<section id="error-by-drift">
|
||||||
|
<h2>Error by Drift<a class="headerlink" href="#error-by-drift" title="Permalink to this heading">¶</a></h2>
|
||||||
|
<p>Above discussed plots are useful for analyzing and comparing
|
||||||
|
the performance of different quantification methods, but are
|
||||||
|
limited to the binary case. The “error by drift” is a plot
|
||||||
|
that shows the error in predictions as a function of the
|
||||||
|
(prior probability) drift between each test sample and the
|
||||||
|
training set. Interestingly, the error and drift can both be measured
|
||||||
|
in terms of any evaluation measure for quantification (like the
|
||||||
|
ones available in <em>qp.error</em>) and can thus be computed
|
||||||
|
irrespectively of the number of classes.</p>
|
||||||
|
<p>The following shows how to generate the plot for the 4 CC variants,
|
||||||
|
using 10 bins for the drift
|
||||||
|
and <em>absolute error</em> as the measure of the error (the
|
||||||
|
drift in the x-axis is always computed in terms of <em>absolute error</em> since
|
||||||
|
other errors are harder to interpret):</p>
|
||||||
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">qp</span><span class="o">.</span><span class="n">plot</span><span class="o">.</span><span class="n">error_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span>
|
||||||
|
<span class="n">error_name</span><span class="o">=</span><span class="s1">'ae'</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="s1">'./plots/err_drift.png'</span><span class="p">)</span>
|
||||||
|
</pre></div>
|
||||||
|
</div>
|
||||||
|
<p><img alt="diag plot on IMDb" src="_images/err_drift.png" /></p>
|
||||||
|
<p>Note that all methods work reasonably well in cases of low prevalence
|
||||||
|
drift (i.e., any CC-variant is a good quantifier whenever the IID
|
||||||
|
assumption is approximately preserved). The higher the drift, the worse
|
||||||
|
those quantifiers tend to perform, although it is clear that PACC
|
||||||
|
yields the lowest error for the most difficult cases.</p>
|
||||||
|
<p>Remember that any plot can be generated <em>across many datasets</em>, and
|
||||||
|
that this would probably result in a more solid comparison.
|
||||||
|
In those cases, however, it is likely that the variances of each
|
||||||
|
method get higher, to the detriment of the visualization.
|
||||||
|
We recommend to set <em>show_std=False</em> in those cases
|
||||||
|
in order to hide the color bands.</p>
|
||||||
|
</section>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="sphinxsidebar" role="navigation" aria-label="main navigation">
|
||||||
|
<div class="sphinxsidebarwrapper">
|
||||||
|
<div>
|
||||||
|
<h3><a href="index.html">Table of Contents</a></h3>
|
||||||
|
<ul>
|
||||||
|
<li><a class="reference internal" href="#">Plotting</a><ul>
|
||||||
|
<li><a class="reference internal" href="#diagonal-plot">Diagonal Plot</a></li>
|
||||||
|
<li><a class="reference internal" href="#quantification-bias">Quantification bias</a></li>
|
||||||
|
<li><a class="reference internal" href="#error-by-drift">Error by Drift</a></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Previous topic</h4>
|
||||||
|
<p class="topless"><a href="Model-Selection.html"
|
||||||
|
title="previous chapter">Model Selection</a></p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h4>Next topic</h4>
|
||||||
|
<p class="topless"><a href="modules.html"
|
||||||
|
title="next chapter">quapy</a></p>
|
||||||
|
</div>
|
||||||
|
<div role="note" aria-label="source link">
|
||||||
|
<h3>This Page</h3>
|
||||||
|
<ul class="this-page-menu">
|
||||||
|
<li><a href="_sources/Plotting.md.txt"
|
||||||
|
rel="nofollow">Show Source</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div id="searchbox" style="display: none" role="search">
|
||||||
|
<h3 id="searchlabel">Quick search</h3>
|
||||||
|
<div class="searchformwrapper">
|
||||||
|
<form class="search" action="search.html" method="get">
|
||||||
|
<input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
|
||||||
|
<input type="submit" value="Go" />
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<script>document.getElementById('searchbox').style.display = "block"</script>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="clearer"></div>
|
||||||
|
</div>
|
||||||
|
<div class="related" role="navigation" aria-label="related navigation">
|
||||||
|
<h3>Navigation</h3>
|
||||||
|
<ul>
|
||||||
|
<li class="right" style="margin-right: 10px">
|
||||||
|
<a href="genindex.html" title="General Index"
|
||||||
|
>index</a></li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="py-modindex.html" title="Python Module Index"
|
||||||
|
>modules</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="modules.html" title="quapy"
|
||||||
|
>next</a> |</li>
|
||||||
|
<li class="right" >
|
||||||
|
<a href="Model-Selection.html" title="Model Selection"
|
||||||
|
>previous</a> |</li>
|
||||||
|
<li class="nav-item nav-item-0"><a href="index.html">QuaPy 0.1.7 documentation</a> »</li>
|
||||||
|
<li class="nav-item nav-item-this"><a href="">Plotting</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="footer" role="contentinfo">
|
||||||
|
© Copyright 2021, Alejandro Moreo.
|
||||||
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 5.3.0.
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
Before Width: | Height: | Size: 62 KiB After Width: | Height: | Size: 62 KiB |
|
Before Width: | Height: | Size: 108 KiB After Width: | Height: | Size: 108 KiB |
|
Before Width: | Height: | Size: 71 KiB After Width: | Height: | Size: 71 KiB |
|
Before Width: | Height: | Size: 185 KiB After Width: | Height: | Size: 185 KiB |
|
Before Width: | Height: | Size: 337 KiB After Width: | Height: | Size: 337 KiB |
|
Before Width: | Height: | Size: 243 KiB After Width: | Height: | Size: 243 KiB |
|
|
@ -1,124 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>Overview: module code — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
|
||||||
<script src="../_static/jquery.js"></script>
|
|
||||||
<script src="../_static/underscore.js"></script>
|
|
||||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../_static/doctools.js"></script>
|
|
||||||
<script src="../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item active">Overview: module code</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>All modules for which code is available</h1>
|
|
||||||
<ul><li><a href="quapy/classification/calibration.html">quapy.classification.calibration</a></li>
|
|
||||||
<li><a href="quapy/classification/methods.html">quapy.classification.methods</a></li>
|
|
||||||
<li><a href="quapy/classification/neural.html">quapy.classification.neural</a></li>
|
|
||||||
<li><a href="quapy/classification/svmperf.html">quapy.classification.svmperf</a></li>
|
|
||||||
<li><a href="quapy/data/base.html">quapy.data.base</a></li>
|
|
||||||
<li><a href="quapy/data/datasets.html">quapy.data.datasets</a></li>
|
|
||||||
<li><a href="quapy/data/preprocessing.html">quapy.data.preprocessing</a></li>
|
|
||||||
<li><a href="quapy/data/reader.html">quapy.data.reader</a></li>
|
|
||||||
<li><a href="quapy/error.html">quapy.error</a></li>
|
|
||||||
<li><a href="quapy/evaluation.html">quapy.evaluation</a></li>
|
|
||||||
<li><a href="quapy/functional.html">quapy.functional</a></li>
|
|
||||||
<li><a href="quapy/method/_kdey.html">quapy.method._kdey</a></li>
|
|
||||||
<li><a href="quapy/method/_neural.html">quapy.method._neural</a></li>
|
|
||||||
<li><a href="quapy/method/_threshold_optim.html">quapy.method._threshold_optim</a></li>
|
|
||||||
<li><a href="quapy/method/aggregative.html">quapy.method.aggregative</a></li>
|
|
||||||
<li><a href="quapy/method/base.html">quapy.method.base</a></li>
|
|
||||||
<li><a href="quapy/method/meta.html">quapy.method.meta</a></li>
|
|
||||||
<li><a href="quapy/method/non_aggregative.html">quapy.method.non_aggregative</a></li>
|
|
||||||
<li><a href="quapy/model_selection.html">quapy.model_selection</a></li>
|
|
||||||
<li><a href="quapy/plot.html">quapy.plot</a></li>
|
|
||||||
<li><a href="quapy/protocol.html">quapy.protocol</a></li>
|
|
||||||
<li><a href="quapy/util.html">quapy.util</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,319 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.classification.calibration — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../../_static/jquery.js"></script>
|
|
||||||
<script src="../../../_static/underscore.js"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../../_static/doctools.js"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.classification.calibration</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.classification.calibration</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">abstention.calibration</span> <span class="kn">import</span> <span class="n">NoBiasVectorScaling</span><span class="p">,</span> <span class="n">TempScaling</span><span class="p">,</span> <span class="n">VectorScaling</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">clone</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">cross_val_predict</span><span class="p">,</span> <span class="n">train_test_split</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="c1"># Wrappers of calibration defined by Alexandari et al. in paper <http://proceedings.mlr.press/v119/alexandari20a.html></span>
|
|
||||||
<span class="c1"># requires "pip install abstension"</span>
|
|
||||||
<span class="c1"># see https://github.com/kundajelab/abstention</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifier"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifier">[docs]</a><span class="k">class</span> <span class="nc">RecalibratedProbabilisticClassifier</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Abstract class for (re)calibration method from `abstention.calibration`, as defined in</span>
|
|
||||||
<span class="sd"> `Alexandari, A., Kundaje, A., & Shrikumar, A. (2020, November). Maximum likelihood with bias-corrected calibration</span>
|
|
||||||
<span class="sd"> is hard-to-beat at label shift adaptation. In International Conference on Machine Learning (pp. 222-232). PMLR.</span>
|
|
||||||
<span class="sd"> <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">pass</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase">[docs]</a><span class="k">class</span> <span class="nc">RecalibratedProbabilisticClassifierBase</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">RecalibratedProbabilisticClassifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Applies a (re)calibration method from `abstention.calibration`, as defined in</span>
|
|
||||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_.</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
|
||||||
<span class="sd"> :param calibrator: the calibration object (an instance of abstention.calibration.CalibratorFactory)</span>
|
|
||||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior probabilities, or a float p</span>
|
|
||||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
|
||||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
|
||||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
|
||||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer); default=None</span>
|
|
||||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">calibrator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">calibrator</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Fits the calibration for the probabilistic classifier.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
|
||||||
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">k</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'wrong value for val_split: the number of folds must be > 2'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit_cv</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="mi">0</span> <span class="o"><</span> <span class="n">k</span> <span class="o"><</span> <span class="mi">1</span><span class="p">):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'wrong value for val_split: the proportion of validation documents must be in (0,1)'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit_tr_val</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit_cv"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_cv">[docs]</a> <span class="k">def</span> <span class="nf">fit_cv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all</span>
|
|
||||||
<span class="sd"> training instances via cross-validation, and then retrains the classifier on all training instances.</span>
|
|
||||||
<span class="sd"> The posterior probabilities thus generated are used for calibrating the outputs of the classifier.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
|
||||||
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="n">cross_val_predict</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'predict_proba'</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="n">nclasses</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">nclasses</span><span class="p">)[</span><span class="n">y</span><span class="p">],</span> <span class="n">posterior_supplied</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit_tr_val"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_tr_val">[docs]</a> <span class="k">def</span> <span class="nf">fit_tr_val</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Fits the calibration in a train/val-split manner, i.e.t, it partitions the training instances into a</span>
|
|
||||||
<span class="sd"> training and a validation set, and then uses the training samples to learn classifier which is then used</span>
|
|
||||||
<span class="sd"> to generate posterior probabilities for the held-out validation data. These posteriors are used to calibrate</span>
|
|
||||||
<span class="sd"> the classifier. The classifier is not retrained on the whole dataset.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
|
||||||
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">Xtr</span><span class="p">,</span> <span class="n">Xva</span><span class="p">,</span> <span class="n">ytr</span><span class="p">,</span> <span class="n">yva</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">test_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="p">,</span> <span class="n">stratify</span><span class="o">=</span><span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">Xtr</span><span class="p">,</span> <span class="n">ytr</span><span class="p">)</span>
|
|
||||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">Xva</span><span class="p">)</span>
|
|
||||||
<span class="n">nclasses</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">yva</span><span class="p">))</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">nclasses</span><span class="p">)[</span><span class="n">yva</span><span class="p">],</span> <span class="n">posterior_supplied</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.predict"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict">[docs]</a> <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Predicts class labels for the data instances in `X`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_samples,)` with the class label predictions</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.predict_proba"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict_proba">[docs]</a> <span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generates posterior probabilities for the data instances in `X`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with posterior probabilities</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the classes on which the classifier has been trained on</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_classes)`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NBVSCalibration"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.NBVSCalibration">[docs]</a><span class="k">class</span> <span class="nc">NBVSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Applies the No-Bias Vector Scaling (NBVS) calibration method from `abstention.calibration`, as defined in</span>
|
|
||||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
|
|
||||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
|
||||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
|
||||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
|
||||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
|
|
||||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">NoBiasVectorScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="BCTSCalibration"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.BCTSCalibration">[docs]</a><span class="k">class</span> <span class="nc">BCTSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from `abstention.calibration`, as defined in</span>
|
|
||||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
|
|
||||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
|
||||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
|
||||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
|
||||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
|
|
||||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">,</span> <span class="n">bias_positions</span><span class="o">=</span><span class="s1">'all'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TSCalibration"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.TSCalibration">[docs]</a><span class="k">class</span> <span class="nc">TSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Applies the Temperature Scaling (TS) calibration method from `abstention.calibration`, as defined in</span>
|
|
||||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
|
|
||||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
|
||||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
|
||||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
|
||||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
|
|
||||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="VSCalibration"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.VSCalibration">[docs]</a><span class="k">class</span> <span class="nc">VSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Applies the Vector Scaling (VS) calibration method from `abstention.calibration`, as defined in</span>
|
|
||||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
|
|
||||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
|
||||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
|
||||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
|
||||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
|
|
||||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">VectorScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,220 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.classification.methods — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.classification.methods</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.classification.methods</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.decomposition</span> <span class="kn">import</span> <span class="n">TruncatedSVD</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LowRankLogisticRegression">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">LowRankLogisticRegression</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> An example of a classification method (i.e., an object that implements `fit`, `predict`, and `predict_proba`)</span>
|
|
||||||
<span class="sd"> that also generates embedded inputs (i.e., that implements `transform`), as those required for</span>
|
|
||||||
<span class="sd"> :class:`quapy.method.neural.QuaNet`. This is a mock method to allow for easily instantiating</span>
|
|
||||||
<span class="sd"> :class:`quapy.method.neural.QuaNet` on array-like real-valued instances.</span>
|
|
||||||
<span class="sd"> The transformation consists of applying :class:`sklearn.decomposition.TruncatedSVD`</span>
|
|
||||||
<span class="sd"> while classification is performed using :class:`sklearn.linear_model.LogisticRegression` on the low-rank space.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param n_components: the number of principal components to retain</span>
|
|
||||||
<span class="sd"> :param kwargs: parameters for the</span>
|
|
||||||
<span class="sd"> `Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__ classifier</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_components</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_components</span> <span class="o">=</span> <span class="n">n_components</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LowRankLogisticRegression.get_params">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.get_params">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Get hyper-parameters for this estimator.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'n_components'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">}</span>
|
|
||||||
<span class="n">params</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">())</span>
|
|
||||||
<span class="k">return</span> <span class="n">params</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LowRankLogisticRegression.set_params">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.set_params">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Set the parameters of this estimator.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param parameters: a `**kwargs` dictionary with the estimator parameters for</span>
|
|
||||||
<span class="sd"> `Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__</span>
|
|
||||||
<span class="sd"> and eventually also `n_components` for `TruncatedSVD`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">params_</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="s1">'n_components'</span> <span class="ow">in</span> <span class="n">params_</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_components</span> <span class="o">=</span> <span class="n">params_</span><span class="p">[</span><span class="s1">'n_components'</span><span class="p">]</span>
|
|
||||||
<span class="k">del</span> <span class="n">params_</span><span class="p">[</span><span class="s1">'n_components'</span><span class="p">]</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params_</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LowRankLogisticRegression.fit">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.fit">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Fit the model according to the given training data. The fit consists of</span>
|
|
||||||
<span class="sd"> fitting `TruncatedSVD` and then `LogisticRegression` on the low-rank representation.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the instances</span>
|
|
||||||
<span class="sd"> :param y: array-like of shape `(n_samples, n_classes)` with the class labels</span>
|
|
||||||
<span class="sd"> :return: `self`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">nF</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="k">if</span> <span class="n">nF</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="o">=</span> <span class="n">TruncatedSVD</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LowRankLogisticRegression.predict">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.predict">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Predicts labels for the instances `X` embedded into the low-rank space.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
|
|
||||||
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
|
|
||||||
<span class="sd"> instances in `X`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LowRankLogisticRegression.predict_proba">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.predict_proba">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Predicts posterior probabilities for the instances `X` embedded into the low-rank space.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LowRankLogisticRegression.transform">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.transform">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the low-rank approximation of `X` with `n_components` dimensions, or `X` unaltered if</span>
|
|
||||||
<span class="sd"> `n_components` >= `X.shape[1]`.</span>
|
|
||||||
<span class="sd"> </span>
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to embed</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_samples, n_components)` with the embedded instances</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">X</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pca</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,715 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.classification.neural — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.classification.neural</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.classification.neural</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">os</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">torch</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">torch.nn.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">accuracy_score</span><span class="p">,</span> <span class="n">f1_score</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">torch.nn.utils.rnn</span> <span class="kn">import</span> <span class="n">pad_sequence</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">EarlyStop</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NeuralClassifierTrainer">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">NeuralClassifierTrainer</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Trains a neural network for text classification.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param net: an instance of `TextClassifierNet` implementing the forward pass</span>
|
|
||||||
<span class="sd"> :param lr: learning rate (default 1e-3)</span>
|
|
||||||
<span class="sd"> :param weight_decay: weight decay (default 0)</span>
|
|
||||||
<span class="sd"> :param patience: number of epochs that do not show any improvement in validation</span>
|
|
||||||
<span class="sd"> to wait before applying early stop (default 10)</span>
|
|
||||||
<span class="sd"> :param epochs: maximum number of training epochs (default 200)</span>
|
|
||||||
<span class="sd"> :param batch_size: batch size for training (default 64)</span>
|
|
||||||
<span class="sd"> :param batch_size_test: batch size for test (default 512)</span>
|
|
||||||
<span class="sd"> :param padding_length: maximum number of tokens to consider in a document (default 300)</span>
|
|
||||||
<span class="sd"> :param device: specify 'cpu' (default) or 'cuda' for enabling gpu</span>
|
|
||||||
<span class="sd"> :param checkpointpath: where to store the parameters of the best model found so far</span>
|
|
||||||
<span class="sd"> according to the evaluation in the held-out validation split (default '../checkpoint/classifier_net.dat')</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
|
||||||
<span class="n">net</span><span class="p">:</span> <span class="s1">'TextClassifierNet'</span><span class="p">,</span>
|
|
||||||
<span class="n">lr</span><span class="o">=</span><span class="mf">1e-3</span><span class="p">,</span>
|
|
||||||
<span class="n">weight_decay</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
|
||||||
<span class="n">patience</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
|
|
||||||
<span class="n">epochs</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span>
|
|
||||||
<span class="n">batch_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
|
|
||||||
<span class="n">batch_size_test</span><span class="o">=</span><span class="mi">512</span><span class="p">,</span>
|
|
||||||
<span class="n">padding_length</span><span class="o">=</span><span class="mi">300</span><span class="p">,</span>
|
|
||||||
<span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">,</span>
|
|
||||||
<span class="n">checkpointpath</span><span class="o">=</span><span class="s1">'../checkpoint/classifier_net.dat'</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">net</span><span class="p">,</span> <span class="n">TextClassifierNet</span><span class="p">),</span> <span class="sa">f</span><span class="s1">'net is not an instance of </span><span class="si">{</span><span class="n">TextClassifierNet</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="n">net</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocab_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">vocabulary_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="o">=</span><span class="p">{</span>
|
|
||||||
<span class="s1">'lr'</span><span class="p">:</span> <span class="n">lr</span><span class="p">,</span>
|
|
||||||
<span class="s1">'weight_decay'</span><span class="p">:</span> <span class="n">weight_decay</span><span class="p">,</span>
|
|
||||||
<span class="s1">'patience'</span><span class="p">:</span> <span class="n">patience</span><span class="p">,</span>
|
|
||||||
<span class="s1">'epochs'</span><span class="p">:</span> <span class="n">epochs</span><span class="p">,</span>
|
|
||||||
<span class="s1">'batch_size'</span><span class="p">:</span> <span class="n">batch_size</span><span class="p">,</span>
|
|
||||||
<span class="s1">'batch_size_test'</span><span class="p">:</span> <span class="n">batch_size_test</span><span class="p">,</span>
|
|
||||||
<span class="s1">'padding_length'</span><span class="p">:</span> <span class="n">padding_length</span><span class="p">,</span>
|
|
||||||
<span class="s1">'device'</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">checkpointpath</span> <span class="o">=</span> <span class="n">checkpointpath</span>
|
|
||||||
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[NeuralNetwork running on </span><span class="si">{</span><span class="n">device</span><span class="si">}</span><span class="s1">]'</span><span class="p">)</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">checkpointpath</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NeuralClassifierTrainer.reset_net_params">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.reset_net_params">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">reset_net_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocab_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Reinitialize the network parameters</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param vocab_size: the size of the vocabulary</span>
|
|
||||||
<span class="sd"> :param n_classes: the number of target classes</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__class__</span><span class="p">(</span><span class="n">vocab_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="p">[</span><span class="s1">'device'</span><span class="p">])</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">xavier_uniform</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NeuralClassifierTrainer.get_params">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.get_params">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Get hyper-parameters for this estimator</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="p">{</span><span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">(),</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="p">}</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NeuralClassifierTrainer.set_params">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.set_params">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Set the parameters of this trainer and the learner it is training.</span>
|
|
||||||
<span class="sd"> In this current version, parameter names for the trainer and learner should</span>
|
|
||||||
<span class="sd"> be disjoint.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param params: a `**kwargs` dictionary with the parameters</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">trainer_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
|
|
||||||
<span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
|
|
||||||
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">params</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
|
||||||
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span> <span class="ow">and</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">learner_hyperparams</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the use of parameter </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s1"> is ambiguous since it can refer to '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'a parameters of the Trainer or the learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span> <span class="ow">and</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">learner_hyperparams</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'parameter </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s1"> is not valid'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span><span class="p">:</span>
|
|
||||||
<span class="n">trainer_hyperparams</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">learner_hyperparams</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span> <span class="o">=</span> <span class="n">trainer_hyperparams</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="n">learner_hyperparams</span> </div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">device</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">""" Gets the device in which the network is allocated</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: device</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">device</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_train_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
|
|
||||||
<span class="n">criterion</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span>
|
|
||||||
<span class="n">losses</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">xi</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
|
|
||||||
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span>
|
|
||||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">yi</span><span class="p">)</span>
|
|
||||||
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
|
|
||||||
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
|
|
||||||
<span class="n">preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">status</span><span class="p">[</span><span class="s2">"loss"</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
|
|
||||||
<span class="n">predictions</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">preds</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
|
|
||||||
<span class="n">true_labels</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">yi</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
|
|
||||||
<span class="n">status</span><span class="p">[</span><span class="s2">"acc"</span><span class="p">]</span> <span class="o">=</span> <span class="n">accuracy_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
|
|
||||||
<span class="n">status</span><span class="p">[</span><span class="s2">"f1"</span><span class="p">]</span> <span class="o">=</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">'macro'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">__update_progress_bar</span><span class="p">(</span><span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_test_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
|
||||||
<span class="n">criterion</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span>
|
|
||||||
<span class="n">losses</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
|
||||||
<span class="k">for</span> <span class="n">xi</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span>
|
|
||||||
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span>
|
|
||||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">yi</span><span class="p">)</span>
|
|
||||||
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
|
|
||||||
<span class="n">preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">predictions</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">preds</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
|
|
||||||
<span class="n">true_labels</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">yi</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
|
|
||||||
|
|
||||||
<span class="n">status</span><span class="p">[</span><span class="s2">"loss"</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
|
|
||||||
<span class="n">status</span><span class="p">[</span><span class="s2">"acc"</span><span class="p">]</span> <span class="o">=</span> <span class="n">accuracy_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
|
|
||||||
<span class="n">status</span><span class="p">[</span><span class="s2">"f1"</span><span class="p">]</span> <span class="o">=</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">'macro'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">__update_progress_bar</span><span class="p">(</span><span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__update_progress_bar</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
|
|
||||||
<span class="n">pbar</span><span class="o">.</span><span class="n">set_description</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">] training epoch=</span><span class="si">{</span><span class="n">epoch</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'tr-loss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr"</span><span class="p">][</span><span class="s2">"loss"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'tr-acc=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr"</span><span class="p">][</span><span class="s2">"acc"</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'tr-macroF1=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr"</span><span class="p">][</span><span class="s2">"f1"</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'patience=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">patience</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'val-loss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va"</span><span class="p">][</span><span class="s2">"loss"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'val-acc=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va"</span><span class="p">][</span><span class="s2">"acc"</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'macroF1=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va"</span><span class="p">][</span><span class="s2">"f1"</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">%'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NeuralClassifierTrainer.fit">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.fit">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Fits the model according to the given training data.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param instances: list of lists of indexed tokens</span>
|
|
||||||
<span class="sd"> :param labels: array-like of shape `(n_samples, n_classes)` with the class labels</span>
|
|
||||||
<span class="sd"> :param val_split: proportion of training documents to be taken as the validation set (default 0.3)</span>
|
|
||||||
<span class="sd"> :return:</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">val_split</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">train</span><span class="o">.</span><span class="n">classes_</span>
|
|
||||||
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
|
|
||||||
<span class="n">checkpoint</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">checkpointpath</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">reset_net_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocab_size</span><span class="p">,</span> <span class="n">train</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">train_generator</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">train</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">train</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
|
|
||||||
<span class="n">opt</span><span class="p">[</span><span class="s1">'batch_size'</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'padding_length'</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'device'</span><span class="p">])</span>
|
|
||||||
<span class="n">valid_generator</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">val</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">val</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
|
|
||||||
<span class="n">opt</span><span class="p">[</span><span class="s1">'batch_size_test'</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'padding_length'</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'device'</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'tr'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'loss'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'acc'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'f1'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">},</span>
|
|
||||||
<span class="s1">'va'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'loss'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'acc'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'f1'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">}}</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">Adam</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'lr'</span><span class="p">],</span> <span class="n">weight_decay</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'weight_decay'</span><span class="p">])</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span> <span class="o">=</span> <span class="n">EarlyStop</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">'patience'</span><span class="p">],</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">with</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">'epochs'</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">))</span> <span class="k">as</span> <span class="n">pbar</span><span class="p">:</span>
|
|
||||||
<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="n">pbar</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_train_epoch</span><span class="p">(</span><span class="n">train_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'tr'</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_test_epoch</span><span class="p">(</span><span class="n">valid_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va'</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va'</span><span class="p">][</span><span class="s1">'f1'</span><span class="p">],</span> <span class="n">epoch</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
|
|
||||||
<span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">state_dict</span><span class="p">(),</span> <span class="n">checkpoint</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">STOP</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'training ended by patience exhasted; loading best model parameters in </span><span class="si">{</span><span class="n">checkpoint</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'for epoch </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">best_epoch</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">checkpoint</span><span class="p">))</span>
|
|
||||||
<span class="k">break</span>
|
|
||||||
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'performing one training pass over the validation set...'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_train_epoch</span><span class="p">(</span><span class="n">valid_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'tr'</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[done]'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NeuralClassifierTrainer.predict">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Predicts labels for the instances</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param instances: list of lists of indexed tokens</span>
|
|
||||||
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
|
|
||||||
<span class="sd"> instances in `X`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">instances</span><span class="p">),</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NeuralClassifierTrainer.predict_proba">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict_proba">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Predicts posterior probabilities for the instances</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
|
||||||
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
|
|
||||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
|
||||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">xi</span> <span class="ow">in</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
|
|
||||||
<span class="n">opt</span><span class="p">[</span><span class="s1">'batch_size_test'</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'padding_length'</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'device'</span><span class="p">]):</span>
|
|
||||||
<span class="n">posteriors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">xi</span><span class="p">))</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NeuralClassifierTrainer.transform">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.transform">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the embeddings of the instances</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param instances: list of lists of indexed tokens</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_samples, embed_size)` with the embedded instances,</span>
|
|
||||||
<span class="sd"> where `embed_size` is defined by the classification network</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
|
||||||
<span class="n">embeddings</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
|
|
||||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
|
||||||
<span class="k">for</span> <span class="n">xi</span> <span class="ow">in</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
|
|
||||||
<span class="n">opt</span><span class="p">[</span><span class="s1">'batch_size_test'</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'padding_length'</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'device'</span><span class="p">]):</span>
|
|
||||||
<span class="n">embeddings</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">document_embedding</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">())</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">embeddings</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TorchDataset">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TorchDataset">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">TorchDataset</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Transforms labelled instances into a Torch's :class:`torch.utils.data.DataLoader` object</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param instances: list of lists of indexed tokens</span>
|
|
||||||
<span class="sd"> :param labels: array-like of shape `(n_samples, n_classes)` with the class labels</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">instances</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">=</span> <span class="n">labels</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="p">{</span><span class="s1">'doc'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="n">index</span><span class="p">],</span> <span class="s1">'label'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span><span class="p">}</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TorchDataset.asDataloader">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TorchDataset.asDataloader">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">asDataloader</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">shuffle</span><span class="p">,</span> <span class="n">pad_length</span><span class="p">,</span> <span class="n">device</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Converts the labelled collection into a Torch DataLoader with dynamic padding for</span>
|
|
||||||
<span class="sd"> the batch</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param batch_size: batch size</span>
|
|
||||||
<span class="sd"> :param shuffle: whether or not to shuffle instances</span>
|
|
||||||
<span class="sd"> :param pad_length: the maximum length for the list of tokens (dynamic padding is</span>
|
|
||||||
<span class="sd"> applied, meaning that if the longest document in the batch is shorter than</span>
|
|
||||||
<span class="sd"> `pad_length`, then the batch is padded up to its length, and not to `pad_length`.</span>
|
|
||||||
<span class="sd"> :param device: whether to allocate tensors in cpu or in cuda</span>
|
|
||||||
<span class="sd"> :return: a :class:`torch.utils.data.DataLoader` object</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">def</span> <span class="nf">collate</span><span class="p">(</span><span class="n">batch</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">LongTensor</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="s1">'doc'</span><span class="p">][:</span><span class="n">pad_length</span><span class="p">])</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">batch</span><span class="p">]</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pad_sequence</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">padding_value</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'PAD_INDEX'</span><span class="p">])</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
<span class="n">targets</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="p">[</span><span class="s1">'label'</span><span class="p">]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">batch</span><span class="p">]</span>
|
|
||||||
<span class="k">if</span> <span class="n">targets</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">data</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">targets</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">targets</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">long</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="p">[</span><span class="n">data</span><span class="p">,</span> <span class="n">targets</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">torchDataset</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span><span class="n">torchDataset</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="n">shuffle</span><span class="p">,</span> <span class="n">collate_fn</span><span class="o">=</span><span class="n">collate</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TextClassifierNet">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">TextClassifierNet</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Abstract Text classifier (`torch.nn.Module`)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TextClassifierNet.document_embedding">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.document_embedding">[docs]</a>
|
|
||||||
<span class="nd">@abstractmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Embeds documents (i.e., performs the forward pass up to the</span>
|
|
||||||
<span class="sd"> next-to-last layer).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param x: a batch of instances, typically generated by a torch's `DataLoader`</span>
|
|
||||||
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
|
|
||||||
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
|
|
||||||
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
|
|
||||||
<span class="sd"> dimensionality of the embedding</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TextClassifierNet.forward">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.forward">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Performs the forward pass.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param x: a batch of instances, typically generated by a torch's `DataLoader`</span>
|
|
||||||
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
|
|
||||||
<span class="sd"> :return: a tensor of shape `(n_instances, n_classes)` with the decision scores</span>
|
|
||||||
<span class="sd"> for each of the instances and classes</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">doc_embedded</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">document_embedding</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">output</span><span class="p">(</span><span class="n">doc_embedded</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TextClassifierNet.dimensions">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.dimensions">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">dimensions</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Gets the number of dimensions of the embedding space</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TextClassifierNet.predict_proba">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.predict_proba">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Predicts posterior probabilities for the instances in `x`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param x: a torch tensor of indexed tokens with shape `(n_instances, pad_length)`</span>
|
|
||||||
<span class="sd"> where `n_instances` is the number of instances in the batch, and `pad_length`</span>
|
|
||||||
<span class="sd"> is length of the pad in the batch</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TextClassifierNet.xavier_uniform">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.xavier_uniform">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">xavier_uniform</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Performs Xavier initialization of the network parameters</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">parameters</span><span class="p">():</span>
|
|
||||||
<span class="k">if</span> <span class="n">p</span><span class="o">.</span><span class="n">dim</span><span class="p">()</span> <span class="o">></span> <span class="mi">1</span> <span class="ow">and</span> <span class="n">p</span><span class="o">.</span><span class="n">requires_grad</span><span class="p">:</span>
|
|
||||||
<span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">xavier_uniform_</span><span class="p">(</span><span class="n">p</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TextClassifierNet.get_params">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.get_params">[docs]</a>
|
|
||||||
<span class="nd">@abstractmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Get hyper-parameters for this estimator</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Return the size of the vocabulary</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LSTMnet">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">LSTMnet</span><span class="p">(</span><span class="n">TextClassifierNet</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on</span>
|
|
||||||
<span class="sd"> Long Short Term Memory networks.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param vocabulary_size: the size of the vocabulary</span>
|
|
||||||
<span class="sd"> :param n_classes: number of target classes</span>
|
|
||||||
<span class="sd"> :param embedding_size: the dimensionality of the word embeddings space (default 100)</span>
|
|
||||||
<span class="sd"> :param hidden_size: the dimensionality of the hidden space (default 256)</span>
|
|
||||||
<span class="sd"> :param repr_size: the dimensionality of the document embeddings space (default 100)</span>
|
|
||||||
<span class="sd"> :param lstm_class_nlayers: number of LSTM layers (default 1)</span>
|
|
||||||
<span class="sd"> :param drop_p: drop probability for dropout (default 0.5)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">embedding_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">hidden_size</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">repr_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">lstm_class_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="n">drop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span> <span class="o">=</span> <span class="n">vocabulary_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span>
|
|
||||||
<span class="s1">'embedding_size'</span><span class="p">:</span> <span class="n">embedding_size</span><span class="p">,</span>
|
|
||||||
<span class="s1">'hidden_size'</span><span class="p">:</span> <span class="n">hidden_size</span><span class="p">,</span>
|
|
||||||
<span class="s1">'repr_size'</span><span class="p">:</span> <span class="n">repr_size</span><span class="p">,</span>
|
|
||||||
<span class="s1">'lstm_class_nlayers'</span><span class="p">:</span> <span class="n">lstm_class_nlayers</span><span class="p">,</span>
|
|
||||||
<span class="s1">'drop_p'</span><span class="p">:</span> <span class="n">drop_p</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">embedding_size</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="n">embedding_size</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="n">lstm_class_nlayers</span><span class="p">,</span> <span class="n">dropout</span><span class="o">=</span><span class="n">drop_p</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">drop_p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">=</span> <span class="n">repr_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">hidden_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__init_hidden</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">set_size</span><span class="p">):</span>
|
|
||||||
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span>
|
|
||||||
<span class="n">var_hidden</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">'lstm_class_nlayers'</span><span class="p">],</span> <span class="n">set_size</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">'hidden_size'</span><span class="p">])</span>
|
|
||||||
<span class="n">var_cell</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">'lstm_class_nlayers'</span><span class="p">],</span> <span class="n">set_size</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">'hidden_size'</span><span class="p">])</span>
|
|
||||||
<span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">:</span>
|
|
||||||
<span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span> <span class="o">=</span> <span class="n">var_hidden</span><span class="o">.</span><span class="n">cuda</span><span class="p">(),</span> <span class="n">var_cell</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LSTMnet.document_embedding">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet.document_embedding">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Embeds documents (i.e., performs the forward pass up to the</span>
|
|
||||||
<span class="sd"> next-to-last layer).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param x: a batch of instances, typically generated by a torch's `DataLoader`</span>
|
|
||||||
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
|
|
||||||
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
|
|
||||||
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
|
|
||||||
<span class="sd"> dimensionality of the embedding</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">embedded</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
|
||||||
<span class="n">rnn_output</span><span class="p">,</span> <span class="n">rnn_hidden</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="p">(</span><span class="n">embedded</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">__init_hidden</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">size</span><span class="p">()[</span><span class="mi">0</span><span class="p">]))</span>
|
|
||||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">rnn_hidden</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span>
|
|
||||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">abstracted</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LSTMnet.get_params">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet.get_params">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Get hyper-parameters for this estimator</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Return the size of the vocabulary</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="CNNnet">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">CNNnet</span><span class="p">(</span><span class="n">TextClassifierNet</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on</span>
|
|
||||||
<span class="sd"> Convolutional Neural Networks.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param vocabulary_size: the size of the vocabulary</span>
|
|
||||||
<span class="sd"> :param n_classes: number of target classes</span>
|
|
||||||
<span class="sd"> :param embedding_size: the dimensionality of the word embeddings space (default 100)</span>
|
|
||||||
<span class="sd"> :param hidden_size: the dimensionality of the hidden space (default 256)</span>
|
|
||||||
<span class="sd"> :param repr_size: the dimensionality of the document embeddings space (default 100)</span>
|
|
||||||
<span class="sd"> :param kernel_heights: list of kernel lengths (default [3,5,7]), i.e., the number of</span>
|
|
||||||
<span class="sd"> consecutive tokens that each kernel covers</span>
|
|
||||||
<span class="sd"> :param stride: convolutional stride (default 1)</span>
|
|
||||||
<span class="sd"> :param stride: convolutional pad (default 0)</span>
|
|
||||||
<span class="sd"> :param drop_p: drop probability for dropout (default 0.5)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">embedding_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">hidden_size</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">repr_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
|
|
||||||
<span class="n">kernel_heights</span><span class="o">=</span><span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span> <span class="n">stride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">drop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">(</span><span class="n">CNNnet</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span> <span class="o">=</span> <span class="n">vocabulary_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span>
|
|
||||||
<span class="s1">'embedding_size'</span><span class="p">:</span> <span class="n">embedding_size</span><span class="p">,</span>
|
|
||||||
<span class="s1">'hidden_size'</span><span class="p">:</span> <span class="n">hidden_size</span><span class="p">,</span>
|
|
||||||
<span class="s1">'repr_size'</span><span class="p">:</span> <span class="n">repr_size</span><span class="p">,</span>
|
|
||||||
<span class="s1">'kernel_heights'</span><span class="p">:</span><span class="n">kernel_heights</span><span class="p">,</span>
|
|
||||||
<span class="s1">'stride'</span><span class="p">:</span> <span class="n">stride</span><span class="p">,</span>
|
|
||||||
<span class="s1">'drop_p'</span><span class="p">:</span> <span class="n">drop_p</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">embedding_size</span><span class="p">)</span>
|
|
||||||
<span class="n">in_channels</span> <span class="o">=</span> <span class="mi">1</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv3</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">drop_p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">=</span> <span class="n">repr_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">kernel_heights</span><span class="p">)</span> <span class="o">*</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__conv_block</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">input</span><span class="p">,</span> <span class="n">conv_layer</span><span class="p">):</span>
|
|
||||||
<span class="n">conv_out</span> <span class="o">=</span> <span class="n">conv_layer</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span> <span class="c1"># conv_out.size() = (batch_size, out_channels, dim, 1)</span>
|
|
||||||
<span class="n">activation</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">conv_out</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span> <span class="c1"># activation.size() = (batch_size, out_channels, dim1)</span>
|
|
||||||
<span class="n">max_out</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">max_pool1d</span><span class="p">(</span><span class="n">activation</span><span class="p">,</span> <span class="n">activation</span><span class="o">.</span><span class="n">size</span><span class="p">()[</span><span class="mi">2</span><span class="p">])</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="c1"># maxpool_out.size() = (batch_size, out_channels)</span>
|
|
||||||
<span class="k">return</span> <span class="n">max_out</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="CNNnet.document_embedding">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet.document_embedding">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">input</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Embeds documents (i.e., performs the forward pass up to the</span>
|
|
||||||
<span class="sd"> next-to-last layer).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param input: a batch of instances, typically generated by a torch's `DataLoader`</span>
|
|
||||||
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
|
|
||||||
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
|
|
||||||
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
|
|
||||||
<span class="sd"> dimensionality of the embedding</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="nb">input</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
|
|
||||||
<span class="nb">input</span> <span class="o">=</span> <span class="nb">input</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># input.size() = (batch_size, 1, num_seq, embedding_length)</span>
|
|
||||||
|
|
||||||
<span class="n">max_out1</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv1</span><span class="p">)</span>
|
|
||||||
<span class="n">max_out2</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv2</span><span class="p">)</span>
|
|
||||||
<span class="n">max_out3</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv3</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">all_out</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">max_out1</span><span class="p">,</span> <span class="n">max_out2</span><span class="p">,</span> <span class="n">max_out3</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span> <span class="c1"># all_out.size() = (batch_size, num_kernels*out_channels)</span>
|
|
||||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">all_out</span><span class="p">))</span> <span class="c1"># (batch_size, num_kernels*out_channels)</span>
|
|
||||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">abstracted</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="CNNnet.get_params">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet.get_params">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Get hyper-parameters for this estimator</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Return the size of the vocabulary</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,268 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.classification.svmperf — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.classification.svmperf</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.classification.svmperf</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">random</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">shutil</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">subprocess</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">tempfile</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">os</span> <span class="kn">import</span> <span class="n">remove</span><span class="p">,</span> <span class="n">makedirs</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">join</span><span class="p">,</span> <span class="n">exists</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">subprocess</span> <span class="kn">import</span> <span class="n">PIPE</span><span class="p">,</span> <span class="n">STDOUT</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">ClassifierMixin</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">dump_svmlight_file</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="SVMperf">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">SVMperf</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">ClassifierMixin</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""A wrapper for the `SVM-perf package <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`__ by Thorsten Joachims.</span>
|
|
||||||
<span class="sd"> When using losses for quantification, the source code has to be patched. See</span>
|
|
||||||
<span class="sd"> the `installation documentation <https://hlt-isti.github.io/QuaPy/build/html/Installation.html#svm-perf-with-quantification-oriented-losses>`__</span>
|
|
||||||
<span class="sd"> for further details.</span>
|
|
||||||
|
|
||||||
<span class="sd"> References:</span>
|
|
||||||
|
|
||||||
<span class="sd"> * `Esuli et al.2015 <https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0>`__</span>
|
|
||||||
<span class="sd"> * `Barranquero et al.2015 <https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X>`__</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param svmperf_base: path to directory containing the binary files `svm_perf_learn` and `svm_perf_classify`</span>
|
|
||||||
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
|
|
||||||
<span class="sd"> :param verbose: set to True to print svm-perf std outputs</span>
|
|
||||||
<span class="sd"> :param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae".</span>
|
|
||||||
<span class="sd"> :param host_folder: directory where to store the trained model; set to None (default) for using a tmp directory</span>
|
|
||||||
<span class="sd"> (temporal directories are automatically deleted)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="c1"># losses with their respective codes in svm_perf implementation</span>
|
|
||||||
<span class="n">valid_losses</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'01'</span><span class="p">:</span><span class="mi">0</span><span class="p">,</span> <span class="s1">'f1'</span><span class="p">:</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'kld'</span><span class="p">:</span><span class="mi">12</span><span class="p">,</span> <span class="s1">'nkld'</span><span class="p">:</span><span class="mi">13</span><span class="p">,</span> <span class="s1">'q'</span><span class="p">:</span><span class="mi">22</span><span class="p">,</span> <span class="s1">'qacc'</span><span class="p">:</span><span class="mi">23</span><span class="p">,</span> <span class="s1">'qf1'</span><span class="p">:</span><span class="mi">24</span><span class="p">,</span> <span class="s1">'qgm'</span><span class="p">:</span><span class="mi">25</span><span class="p">,</span> <span class="s1">'mae'</span><span class="p">:</span><span class="mi">26</span><span class="p">,</span> <span class="s1">'mrae'</span><span class="p">:</span><span class="mi">27</span><span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">svmperf_base</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'01'</span><span class="p">,</span> <span class="n">host_folder</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="n">exists</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">),</span> <span class="sa">f</span><span class="s1">'path </span><span class="si">{</span><span class="n">svmperf_base</span><span class="si">}</span><span class="s1"> does not seem to point to a valid path'</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span> <span class="o">=</span> <span class="n">svmperf_base</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">C</span> <span class="o">=</span> <span class="n">C</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span> <span class="o">=</span> <span class="n">host_folder</span>
|
|
||||||
|
|
||||||
<span class="c1"># def set_params(self, **parameters):</span>
|
|
||||||
<span class="c1"># """</span>
|
|
||||||
<span class="c1"># Set the hyper-parameters for svm-perf. Currently, only the `C` and `loss` parameters are supported</span>
|
|
||||||
<span class="c1">#</span>
|
|
||||||
<span class="c1"># :param parameters: a `**kwargs` dictionary `{'C': <float>}`</span>
|
|
||||||
<span class="c1"># """</span>
|
|
||||||
<span class="c1"># assert sorted(list(parameters.keys())) == ['C', 'loss'], \</span>
|
|
||||||
<span class="c1"># 'currently, only the C and loss parameters are supported'</span>
|
|
||||||
<span class="c1"># self.C = parameters.get('C', self.C)</span>
|
|
||||||
<span class="c1"># self.loss = parameters.get('loss', self.loss)</span>
|
|
||||||
<span class="c1">#</span>
|
|
||||||
<span class="c1"># def get_params(self, deep=True):</span>
|
|
||||||
<span class="c1"># return {'C': self.C, 'loss': self.loss}</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="SVMperf.fit">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.fit">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Trains the SVM for the multivariate performance loss</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: training instances</span>
|
|
||||||
<span class="sd"> :param y: a binary vector of labels</span>
|
|
||||||
<span class="sd"> :return: `self`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss</span> <span class="ow">in</span> <span class="n">SVMperf</span><span class="o">.</span><span class="n">valid_losses</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'unsupported loss </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="si">}</span><span class="s1">, valid ones are </span><span class="si">{</span><span class="nb">list</span><span class="p">(</span><span class="n">SVMperf</span><span class="o">.</span><span class="n">valid_losses</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_learn</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="s1">'svm_perf_learn'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_classify</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="s1">'svm_perf_classify'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">loss_cmd</span> <span class="o">=</span> <span class="s1">'-w 3 -l '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">valid_losses</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="p">])</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">c_cmd</span> <span class="o">=</span> <span class="s1">'-c '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">C</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes_</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
|
|
||||||
<span class="c1"># this would allow to run parallel instances of predict</span>
|
|
||||||
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">'svmperfprocess'</span><span class="o">+</span><span class="s1">'-'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="c1"># tmp dir are removed after the fit terminates in multiprocessing...</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">TemporaryDirectory</span><span class="p">(</span><span class="n">suffix</span><span class="o">=</span><span class="n">random_code</span><span class="p">)</span><span class="o">.</span><span class="n">name</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span><span class="p">,</span> <span class="s1">'.'</span> <span class="o">+</span> <span class="n">random_code</span><span class="p">)</span>
|
|
||||||
<span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">'model-'</span><span class="o">+</span><span class="n">random_code</span><span class="p">)</span>
|
|
||||||
<span class="n">traindat</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'train-</span><span class="si">{</span><span class="n">random_code</span><span class="si">}</span><span class="s1">.dat'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">dump_svmlight_file</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">traindat</span><span class="p">,</span> <span class="n">zero_based</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">cmd</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_learn</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">c_cmd</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss_cmd</span><span class="p">,</span> <span class="n">traindat</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">])</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[Running]'</span><span class="p">,</span> <span class="n">cmd</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">split</span><span class="p">(),</span> <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">STDOUT</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">):</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span>
|
|
||||||
<span class="n">remove</span><span class="p">(</span><span class="n">traindat</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="SVMperf.predict">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.predict">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Predicts labels for the instances `X`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
|
|
||||||
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
|
|
||||||
<span class="sd"> instances in `X`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">confidence_scores</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">decision_function</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="p">(</span><span class="n">confidence_scores</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span> <span class="o">*</span> <span class="mi">1</span>
|
|
||||||
<span class="k">return</span> <span class="n">predictions</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="SVMperf.decision_function">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.decision_function">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">decision_function</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Evaluate the decision function for the samples in `X`.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` containing the instances to classify</span>
|
|
||||||
<span class="sd"> :param y: unused</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_samples,)` containing the decision scores of the instances</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'tmpdir'</span><span class="p">),</span> <span class="s1">'predict called before fit'</span>
|
|
||||||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'model directory corrupted'</span>
|
|
||||||
<span class="k">assert</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">),</span> <span class="s1">'model not found'</span>
|
|
||||||
<span class="k">if</span> <span class="n">y</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="c1"># in order to allow for parallel runs of predict, a random code is assigned</span>
|
|
||||||
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
|
|
||||||
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">'-'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
|
|
||||||
<span class="n">predictions_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">'predictions'</span> <span class="o">+</span> <span class="n">random_code</span> <span class="o">+</span> <span class="s1">'.dat'</span><span class="p">)</span>
|
|
||||||
<span class="n">testdat</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">'test'</span> <span class="o">+</span> <span class="n">random_code</span> <span class="o">+</span> <span class="s1">'.dat'</span><span class="p">)</span>
|
|
||||||
<span class="n">dump_svmlight_file</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">testdat</span><span class="p">,</span> <span class="n">zero_based</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">cmd</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_classify</span><span class="p">,</span> <span class="n">testdat</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions_path</span><span class="p">])</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[Running]'</span><span class="p">,</span> <span class="n">cmd</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">split</span><span class="p">(),</span> <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">STDOUT</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">scores</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">loadtxt</span><span class="p">(</span><span class="n">predictions_path</span><span class="p">)</span>
|
|
||||||
<span class="n">remove</span><span class="p">(</span><span class="n">testdat</span><span class="p">)</span>
|
|
||||||
<span class="n">remove</span><span class="p">(</span><span class="n">predictions_path</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">scores</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__del__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'tmpdir'</span><span class="p">):</span>
|
|
||||||
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="n">ignore_errors</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,165 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.data._ifcb — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.data._ifcb</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.data._ifcb</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">os</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IFCBTrainSamplesFromDir">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTrainSamplesFromDir">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">classes</span><span class="p">:</span> <span class="nb">list</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes</span> <span class="o">=</span> <span class="n">classes</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">path_dir</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">filename</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'.csv'</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">:</span>
|
|
||||||
<span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span><span class="n">sample</span><span class="p">))</span>
|
|
||||||
<span class="c1"># all columns but the first where we get the class</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
|
|
||||||
<span class="k">yield</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IFCBTrainSamplesFromDir.total">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTrainSamplesFromDir.total">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the total number of samples that the protocol generates.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: The number of training samples to generate.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IFCBTestSamples">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTestSamples">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">IFCBTestSamples</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">test_prevalences_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path_dir</span><span class="p">,</span> <span class="n">test_prevalences_path</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">for</span> <span class="n">_</span><span class="p">,</span> <span class="n">test_sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
|
|
||||||
<span class="c1">#Load the sample from disk</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span><span class="n">test_sample</span><span class="p">[</span><span class="s1">'sample'</span><span class="p">]</span><span class="o">+</span><span class="s1">'.csv'</span><span class="p">))</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">test_sample</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span>
|
|
||||||
<span class="k">yield</span> <span class="n">X</span><span class="p">,</span> <span class="n">prevalences</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IFCBTestSamples.total">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTestSamples.total">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the total number of samples that the protocol generates.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: The number of test samples to generate.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span><span class="o">.</span><span class="n">index</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,307 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.data._lequa2022 — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.data._lequa2022</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.data._lequa2022</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Union</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">os</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span>
|
|
||||||
|
|
||||||
<span class="n">DEV_SAMPLES</span> <span class="o">=</span> <span class="mi">1000</span>
|
|
||||||
<span class="n">TEST_SAMPLES</span> <span class="o">=</span> <span class="mi">5000</span>
|
|
||||||
|
|
||||||
<span class="n">ERROR_TOL</span> <span class="o">=</span> <span class="mf">1E-3</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="load_category_map">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_category_map">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">load_category_map</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
|
||||||
<span class="n">cat2code</span> <span class="o">=</span> <span class="p">{}</span>
|
|
||||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">)</span> <span class="k">as</span> <span class="n">fin</span><span class="p">:</span>
|
|
||||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">fin</span><span class="p">:</span>
|
|
||||||
<span class="n">category</span><span class="p">,</span> <span class="n">code</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
|
|
||||||
<span class="n">cat2code</span><span class="p">[</span><span class="n">category</span><span class="p">]</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">code</span><span class="p">)</span>
|
|
||||||
<span class="n">code2cat</span> <span class="o">=</span> <span class="p">[</span><span class="n">cat</span> <span class="k">for</span> <span class="n">cat</span><span class="p">,</span> <span class="n">code</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">cat2code</span><span class="o">.</span><span class="n">items</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">])]</span>
|
|
||||||
<span class="k">return</span> <span class="n">cat2code</span><span class="p">,</span> <span class="n">code2cat</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="load_raw_documents">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_raw_documents">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">load_raw_documents</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
|
|
||||||
<span class="n">documents</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s2">"text"</span><span class="p">]</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
|
|
||||||
<span class="n">labels</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="k">if</span> <span class="s2">"label"</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
|
|
||||||
<span class="n">labels</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s2">"label"</span><span class="p">]</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">documents</span><span class="p">,</span> <span class="n">labels</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="load_vector_documents">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_vector_documents">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">load_vector_documents</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
|
||||||
<span class="n">D</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
|
||||||
<span class="n">labelled</span> <span class="o">=</span> <span class="n">D</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="mi">301</span>
|
|
||||||
<span class="k">if</span> <span class="n">labelled</span><span class="p">:</span>
|
|
||||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">D</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:],</span> <span class="n">D</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">D</span><span class="p">,</span> <span class="kc">None</span>
|
|
||||||
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="SamplesFromDir">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.SamplesFromDir">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">SamplesFromDir</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">ground_truth_path</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">load_fn</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_fn</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">true_prevs</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">ground_truth_path</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">for</span> <span class="nb">id</span><span class="p">,</span> <span class="n">prevalence</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">true_prevs</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
|
|
||||||
<span class="n">sample</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">load_fn</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="nb">id</span><span class="si">}</span><span class="s1">.txt'</span><span class="p">))</span>
|
|
||||||
<span class="k">yield</span> <span class="n">sample</span><span class="p">,</span> <span class="n">prevalence</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ResultSubmission">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">ResultSubmission</span><span class="p">:</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__init_df</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">categories</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">categories</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">or</span> <span class="n">categories</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'wrong format for categories: an int (>=2) was expected'</span><span class="p">)</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">categories</span><span class="p">)))</span>
|
|
||||||
<span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">set_names</span><span class="p">(</span><span class="s1">'id'</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">n_categories</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ResultSubmission.add">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.add">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">add</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">prevalence_values</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sample_id</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: expected int for sample_sample, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">sample_id</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: expected np.ndarray for prevalence_values, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">__init_df</span><span class="p">(</span><span class="n">categories</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">))</span>
|
|
||||||
<span class="k">if</span> <span class="n">sample_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">values</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: prevalence values for "</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">" already added'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">prevalence_values</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">1</span> <span class="ow">and</span> <span class="n">prevalence_values</span><span class="o">.</span><span class="n">size</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_categories</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: wrong shape found for prevalence vector </span><span class="si">{</span><span class="n">prevalence_values</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="p">(</span><span class="n">prevalence_values</span> <span class="o"><</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span> <span class="ow">or</span> <span class="p">(</span><span class="n">prevalence_values</span> <span class="o">></span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: prevalence values out of range [0,1] for "</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">"'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">prevalence_values</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">></span> <span class="n">ERROR_TOL</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: prevalence values do not sum up to one for "</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">"'</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'(error tolerance </span><span class="si">{</span><span class="n">ERROR_TOL</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">sample_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">prevalence_values</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ResultSubmission.load">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.load">[docs]</a>
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s1">'ResultSubmission'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_file_format</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
|
|
||||||
<span class="n">r</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="p">()</span>
|
|
||||||
<span class="n">r</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
|
|
||||||
<span class="k">return</span> <span class="n">r</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ResultSubmission.dump">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.dump">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">dump</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
|
|
||||||
<span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_dataframe_format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ResultSubmission.prevalence">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.prevalence">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">prevalence</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
|
||||||
<span class="n">sel</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">sample_id</span><span class="p">]</span>
|
|
||||||
<span class="k">if</span> <span class="n">sel</span><span class="o">.</span><span class="n">empty</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="kc">None</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">sel</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ResultSubmission.iterrows">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.iterrows">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">iterrows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">row</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
|
|
||||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
|
||||||
<span class="k">yield</span> <span class="n">index</span><span class="p">,</span> <span class="n">prevalence</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ResultSubmission.check_file_format">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.check_file_format">[docs]</a>
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">check_file_format</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span>
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the file </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1"> does not seem to be a valid csv file. '</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_dataframe_format</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ResultSubmission.check_dataframe_format">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.check_dataframe_format">[docs]</a>
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">check_dataframe_format</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">df</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span>
|
|
||||||
<span class="n">hint_path</span> <span class="o">=</span> <span class="s1">''</span> <span class="c1"># if given, show the data path in the error message</span>
|
|
||||||
<span class="k">if</span> <span class="n">path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">hint_path</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">' in </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">name</span> <span class="o">!=</span> <span class="s1">'id'</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">)</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'wrong header</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">, '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'the format of the header should be "id,0,...,n-1", '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'where n is the number of categories'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">ci</span><span class="p">)</span> <span class="k">for</span> <span class="n">ci</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">values</span><span class="p">]</span> <span class="o">!=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">))):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'wrong header</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">, category ids should be 0,1,2,...,n-1, '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'where n is the number of categories'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">df</span><span class="o">.</span><span class="n">empty</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: results file is empty'</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="o">!=</span> <span class="n">DEV_SAMPLES</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="o">!=</span> <span class="n">TEST_SAMPLES</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'wrong number of prevalence values found</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">; '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'expected </span><span class="si">{</span><span class="n">DEV_SAMPLES</span><span class="si">}</span><span class="s1"> for development sets and '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">TEST_SAMPLES</span><span class="si">}</span><span class="s1"> for test sets; found </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
|
|
||||||
<span class="n">expected_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)))</span>
|
|
||||||
<span class="k">if</span> <span class="n">ids</span> <span class="o">!=</span> <span class="n">expected_ids</span><span class="p">:</span>
|
|
||||||
<span class="n">missing</span> <span class="o">=</span> <span class="n">expected_ids</span> <span class="o">-</span> <span class="n">ids</span>
|
|
||||||
<span class="k">if</span> <span class="n">missing</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'there are </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1"> missing ids</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: </span><span class="si">{</span><span class="nb">sorted</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="n">unexpected</span> <span class="o">=</span> <span class="n">ids</span> <span class="o">-</span> <span class="n">expected_ids</span>
|
|
||||||
<span class="k">if</span> <span class="n">unexpected</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'there are </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1"> unexpected ids</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: </span><span class="si">{</span><span class="nb">sorted</span><span class="p">(</span><span class="n">unexpected</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">for</span> <span class="n">category_id</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">category_id</span><span class="p">]</span> <span class="o"><</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span> <span class="ow">or</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">category_id</span><span class="p">]</span> <span class="o">></span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1"> column "</span><span class="si">{</span><span class="n">category_id</span><span class="si">}</span><span class="s1">" contains values out of range [0,1]'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">round_errors</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">prevs</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="mf">1.</span><span class="p">)</span> <span class="o">></span> <span class="n">ERROR_TOL</span>
|
|
||||||
<span class="k">if</span> <span class="n">round_errors</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'warning: prevalence values in rows with id </span><span class="si">{</span><span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">round_errors</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'do not sum up to 1 (error tolerance </span><span class="si">{</span><span class="n">ERROR_TOL</span><span class="si">}</span><span class="s1">), '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'probably due to some rounding errors.'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">df</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,728 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.data.base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.data.base</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.data.base</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">cached_property</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Iterable</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">issparse</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">vstack</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">train_test_split</span><span class="p">,</span> <span class="n">RepeatedStratifiedKFold</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">numpy.random</span> <span class="kn">import</span> <span class="n">RandomState</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">strprev</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">temp_seed</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">LabelledCollection</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> A LabelledCollection is a set of objects each with a label attached to each of them. </span>
|
|
||||||
<span class="sd"> This class implements several sampling routines and other utilities.</span>
|
|
||||||
<span class="sd"> </span>
|
|
||||||
<span class="sd"> :param instances: array-like (np.ndarray, list, or csr_matrix are supported)</span>
|
|
||||||
<span class="sd"> :param labels: array-like with the same length of instances</span>
|
|
||||||
<span class="sd"> :param classes: optional, list of classes from which labels are taken. If not specified, the classes are inferred</span>
|
|
||||||
<span class="sd"> from the labels. The classes must be indicated in cases in which some of the labels might have no examples</span>
|
|
||||||
<span class="sd"> (i.e., a prevalence of 0)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">instances</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">str</span><span class="p">):</span>
|
|
||||||
<span class="c1"># lists of strings occupy too much as ndarrays (although python-objects add a heavy overload)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">labels</span><span class="p">)</span>
|
|
||||||
<span class="n">n_docs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">classes</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">classes</span><span class="p">))</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
|
||||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">difference</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">classes</span><span class="p">)))</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'labels (</span><span class="si">{</span><span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="si">}</span><span class="s1">) contain values not included in classes_ (</span><span class="si">{</span><span class="nb">set</span><span class="p">(</span><span class="n">classes</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="p">{</span><span class="n">class_</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">n_docs</span><span class="p">)[</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">==</span> <span class="n">class_</span><span class="p">]</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">}</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.load">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.load">[docs]</a>
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">:</span> <span class="n">callable</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads a labelled set of data and convert it into a :class:`LabelledCollection` instance. The function in charge</span>
|
|
||||||
<span class="sd"> of reading the instances must be specified. This function can be a custom one, or any of the reading functions</span>
|
|
||||||
<span class="sd"> defined in :mod:`quapy.data.reader` module.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param path: string, the path to the file containing the labelled instances</span>
|
|
||||||
<span class="sd"> :param loader_func: a custom function that implements the data loader and returns a tuple with instances and</span>
|
|
||||||
<span class="sd"> labels</span>
|
|
||||||
<span class="sd"> :param classes: array-like, the classes according to which the instances are labelled</span>
|
|
||||||
<span class="sd"> :param loader_kwargs: any argument that the `loader_func` function needs in order to read the instances, i.e.,</span>
|
|
||||||
<span class="sd"> these arguments are used to call `loader_func(path, **loader_kwargs)`</span>
|
|
||||||
<span class="sd"> :return: a :class:`LabelledCollection` object</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="o">*</span><span class="n">loader_func</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">),</span> <span class="n">classes</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the length of this collection (number of labelled instances)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.prevalence">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.prevalence">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">prevalence</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the prevalence, or relative frequency, of the classes in the codeframe.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a np.ndarray of shape `(n_classes)` with the relative frequencies of each class, in the same order</span>
|
|
||||||
<span class="sd"> as listed by `self.classes_`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">counts</span><span class="p">()</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.counts">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.counts">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">counts</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the number of instances for each of the classes in the codeframe.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a np.ndarray of shape `(n_classes)` with the number of instances of each class, in the same order</span>
|
|
||||||
<span class="sd"> as listed by `self.classes_`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">])</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">])</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">n_classes</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> The number of classes</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns True if the number of classes is 2</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: boolean</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">==</span> <span class="mi">2</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.sampling_index">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling_index">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the</span>
|
|
||||||
<span class="sd"> prevalence values are not specified, then returns the index of a uniform sampling.</span>
|
|
||||||
<span class="sd"> For each class, the sampling is drawn with replacement if the requested prevalence is larger than</span>
|
|
||||||
<span class="sd"> the actual prevalence of the class, or without replacement otherwise.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param size: integer, the requested size</span>
|
|
||||||
<span class="sd"> :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since</span>
|
|
||||||
<span class="sd"> it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in</span>
|
|
||||||
<span class="sd"> `self.classes_` can be specified, while the other class takes prevalence value `1-p`</span>
|
|
||||||
<span class="sd"> :param shuffle: if set to True (default), shuffles the index before returning it</span>
|
|
||||||
<span class="sd"> :param random_state: seed for reproducing sampling</span>
|
|
||||||
<span class="sd"> :return: a np.ndarray of shape `(size)` with the indexes</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="c1"># no prevalence was indicated; returns an index for uniform sampling</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">prevs</span> <span class="o">+</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">),)</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="s1">'unexpected number of prevalences'</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'prevalences (</span><span class="si">{</span><span class="n">prevs</span><span class="si">}</span><span class="s1">) wrong range (sum=</span><span class="si">{</span><span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span>
|
|
||||||
|
|
||||||
<span class="c1"># Decide how many instances should be taken for each class in order to satisfy the requested prevalence</span>
|
|
||||||
<span class="c1"># accurately, and the number of instances in the sample (exactly). If int(size * prevs[i]) (which is</span>
|
|
||||||
<span class="c1"># <= size * prevs[i]) examples are drawn from class i, there could be a remainder number of instances to take</span>
|
|
||||||
<span class="c1"># to satisfy the size constrain. The remainder is distributed along the classes with probability = prevs.</span>
|
|
||||||
<span class="c1"># (This aims at avoiding the remainder to be placed in a class for which the prevalence requested is 0.)</span>
|
|
||||||
<span class="n">n_requests</span> <span class="o">=</span> <span class="p">{</span><span class="n">class_</span><span class="p">:</span> <span class="nb">round</span><span class="p">(</span><span class="n">size</span> <span class="o">*</span> <span class="n">prevs</span><span class="p">[</span><span class="n">i</span><span class="p">])</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">class_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)}</span>
|
|
||||||
<span class="n">remainder</span> <span class="o">=</span> <span class="n">size</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">n_requests</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
|
||||||
<span class="k">with</span> <span class="n">temp_seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">):</span>
|
|
||||||
<span class="c1"># due to rounding, the remainder can be 0, >0, or <0</span>
|
|
||||||
<span class="k">if</span> <span class="n">remainder</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="c1"># when the remainder is >0 we randomly add 1 to the requests for each class;</span>
|
|
||||||
<span class="c1"># more prevalent classes are more likely to be taken in order to minimize the impact in the final prevalence</span>
|
|
||||||
<span class="k">for</span> <span class="n">rand_class</span> <span class="ow">in</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">remainder</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">prevs</span><span class="p">):</span>
|
|
||||||
<span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
|
|
||||||
<span class="k">elif</span> <span class="n">remainder</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="c1"># when the remainder is <0 we randomly remove 1 from the requests, unless the request is 0 for a chosen</span>
|
|
||||||
<span class="c1"># class; we repeat until remainder==0</span>
|
|
||||||
<span class="k">while</span> <span class="n">remainder</span><span class="o">!=</span><span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="n">rand_class</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">prevs</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">-=</span> <span class="mi">1</span>
|
|
||||||
<span class="n">remainder</span> <span class="o">+=</span> <span class="mi">1</span>
|
|
||||||
|
|
||||||
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">class_</span><span class="p">,</span> <span class="n">n_requested</span> <span class="ow">in</span> <span class="n">n_requests</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
|
||||||
<span class="n">n_candidates</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">])</span>
|
|
||||||
<span class="n">index_sample</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">][</span>
|
|
||||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">n_candidates</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">n_requested</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="p">(</span><span class="n">n_requested</span> <span class="o">></span> <span class="n">n_candidates</span><span class="p">))</span>
|
|
||||||
<span class="p">]</span> <span class="k">if</span> <span class="n">n_requested</span> <span class="o">></span> <span class="mi">0</span> <span class="k">else</span> <span class="p">[]</span>
|
|
||||||
|
|
||||||
<span class="n">indexes_sample</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_sample</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">indexes_sample</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">shuffle</span><span class="p">:</span>
|
|
||||||
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">permutation</span><span class="p">(</span><span class="n">indexes_sample</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">indexes_sample</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.uniform_sampling_index">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.uniform_sampling_index">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">uniform_sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn</span>
|
|
||||||
<span class="sd"> with replacement if the requested size is greater than the number of instances, or without replacement</span>
|
|
||||||
<span class="sd"> otherwise.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param size: integer, the size of the uniform sample</span>
|
|
||||||
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
|
|
||||||
<span class="sd"> :return: a np.ndarray of shape `(size)` with the indexes</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">ng</span> <span class="o">=</span> <span class="n">RandomState</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">ng</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span>
|
|
||||||
<span class="k">return</span> <span class="n">ng</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="n">size</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="n">size</span> <span class="o">></span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.sampling">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">sampling</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence</span>
|
|
||||||
<span class="sd"> values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than</span>
|
|
||||||
<span class="sd"> the actual prevalence of the class, or with replacement otherwise.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param size: integer, the requested size</span>
|
|
||||||
<span class="sd"> :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since</span>
|
|
||||||
<span class="sd"> it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in</span>
|
|
||||||
<span class="sd"> `self.classes_` can be specified, while the other class takes prevalence value `1-p`</span>
|
|
||||||
<span class="sd"> :param shuffle: if set to True (default), shuffles the index before returning it</span>
|
|
||||||
<span class="sd"> :param random_state: seed for reproducing sampling</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`LabelledCollection` with length == `size` and prevalence close to `prevs` (or</span>
|
|
||||||
<span class="sd"> prevalence == `prevs` if the exact prevalence values can be met as proportions of instances)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">prev_index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="n">shuffle</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">prev_index</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.uniform_sampling">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.uniform_sampling">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">uniform_sampling</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn</span>
|
|
||||||
<span class="sd"> with replacement if the requested size is greater than the number of instances, or without replacement</span>
|
|
||||||
<span class="sd"> otherwise.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param size: integer, the requested size</span>
|
|
||||||
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`LabelledCollection` with length == `size`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">unif_index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">unif_index</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.sampling_from_index">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling_from_index">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">sampling_from_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns an instance of :class:`LabelledCollection` whose elements are sampled from this collection using the</span>
|
|
||||||
<span class="sd"> index.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param index: np.ndarray</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`LabelledCollection`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">documents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
|
||||||
<span class="n">labels</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
|
||||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">documents</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.split_stratified">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.split_stratified">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">split_stratified</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">train_prop</span><span class="o">=</span><span class="mf">0.6</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns two instances of :class:`LabelledCollection` split with stratification from this collection, at desired</span>
|
|
||||||
<span class="sd"> proportion.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param train_prop: the proportion of elements to include in the left-most returned collection (typically used</span>
|
|
||||||
<span class="sd"> as the training collection). The rest of elements are included in the right-most returned collection</span>
|
|
||||||
<span class="sd"> (typically used as a test collection).</span>
|
|
||||||
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
|
|
||||||
<span class="sd"> :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the</span>
|
|
||||||
<span class="sd"> second one with `1-train_prop` elements</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">tr_docs</span><span class="p">,</span> <span class="n">te_docs</span><span class="p">,</span> <span class="n">tr_labels</span><span class="p">,</span> <span class="n">te_labels</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=</span><span class="n">train_prop</span><span class="p">,</span> <span class="n">stratify</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">tr_docs</span><span class="p">,</span> <span class="n">tr_labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">te_docs</span><span class="p">,</span> <span class="n">te_labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">training</span><span class="p">,</span> <span class="n">test</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.split_random">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.split_random">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">split_random</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">train_prop</span><span class="o">=</span><span class="mf">0.6</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns two instances of :class:`LabelledCollection` split randomly from this collection, at desired</span>
|
|
||||||
<span class="sd"> proportion.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param train_prop: the proportion of elements to include in the left-most returned collection (typically used</span>
|
|
||||||
<span class="sd"> as the training collection). The rest of elements are included in the right-most returned collection</span>
|
|
||||||
<span class="sd"> (typically used as a test collection).</span>
|
|
||||||
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
|
|
||||||
<span class="sd"> :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the</span>
|
|
||||||
<span class="sd"> second one with `1-train_prop` elements</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">indexes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">RandomState</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span><span class="o">.</span><span class="n">permutation</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">train_prop</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="n">train_prop</span> <span class="o"><</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> \
|
|
||||||
<span class="s1">'argument train_prop cannot be greater than the number of elements in the collection'</span>
|
|
||||||
<span class="n">splitpoint</span> <span class="o">=</span> <span class="n">train_prop</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">train_prop</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="mi">0</span> <span class="o"><</span> <span class="n">train_prop</span> <span class="o"><</span> <span class="mi">1</span><span class="p">,</span> \
|
|
||||||
<span class="s1">'argument train_prop out of range (0,1)'</span>
|
|
||||||
<span class="n">splitpoint</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span><span class="o">*</span><span class="n">train_prop</span><span class="p">))</span>
|
|
||||||
<span class="n">left</span><span class="p">,</span> <span class="n">right</span> <span class="o">=</span> <span class="n">indexes</span><span class="p">[:</span><span class="n">splitpoint</span><span class="p">],</span> <span class="n">indexes</span><span class="p">[</span><span class="n">splitpoint</span><span class="p">:]</span>
|
|
||||||
<span class="n">training</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">left</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">right</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">training</span><span class="p">,</span> <span class="n">test</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__add__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns a new :class:`LabelledCollection` as the union of this collection with another collection.</span>
|
|
||||||
<span class="sd"> Both labelled collections must have the same classes.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param other: another :class:`LabelledCollection`</span>
|
|
||||||
<span class="sd"> :return: a :class:`LabelledCollection` representing the union of both collections</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span><span class="o">==</span><span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">classes_</span><span class="p">)):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'unsupported operation for collections on different classes; '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'expected </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="n">other</span><span class="o">.</span><span class="n">classes_</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.join">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.join">[docs]</a>
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">join</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="s1">'LabelledCollection'</span><span class="p">]):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns a new :class:`LabelledCollection` as the union of the collections given in input.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param args: instances of :class:`LabelledCollection`</span>
|
|
||||||
<span class="sd"> :return: a :class:`LabelledCollection` representing the union of both collections</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">args</span> <span class="o">=</span> <span class="p">[</span><span class="n">lc</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span> <span class="k">if</span> <span class="n">lc</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">]</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'empty list is not allowed for mix'</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">lc</span><span class="p">,</span> <span class="n">LabelledCollection</span><span class="p">)</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">]),</span> \
|
|
||||||
<span class="s1">'only instances of LabelledCollection allowed'</span>
|
|
||||||
|
|
||||||
<span class="n">first_instances</span> <span class="o">=</span> <span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">instances</span>
|
|
||||||
<span class="n">first_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">first_instances</span><span class="p">)</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="nb">type</span><span class="p">(</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span><span class="o">==</span><span class="n">first_type</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
|
|
||||||
<span class="s1">'not all the collections are of instances of the same type'</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">first_instances</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">first_instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
|
||||||
<span class="n">first_ndim</span> <span class="o">=</span> <span class="n">first_instances</span><span class="o">.</span><span class="n">ndim</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="n">first_ndim</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
|
|
||||||
<span class="s1">'not all the ndarrays are of the same dimension'</span>
|
|
||||||
<span class="k">if</span> <span class="n">first_ndim</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="n">first_shape</span> <span class="o">=</span> <span class="n">first_instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="o">==</span> <span class="n">first_shape</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
|
|
||||||
<span class="s1">'not all the ndarrays are of the same shape'</span>
|
|
||||||
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">first_instances</span><span class="p">):</span>
|
|
||||||
<span class="n">instances</span> <span class="o">=</span> <span class="n">vstack</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">first_instances</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
|
||||||
<span class="n">instances</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">(</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">))</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'unsupported operation for collection types'</span><span class="p">)</span>
|
|
||||||
<span class="n">labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">labels</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
|
|
||||||
<span class="n">classes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">Xy</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Gets the instances and labels. This is useful when working with `sklearn` estimators, e.g.:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> svm = LinearSVC().fit(*my_collection.Xy)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a tuple `(instances, labels)` from this collection</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">Xp</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from</span>
|
|
||||||
<span class="sd"> a :class:`LabelledCollection` object.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a tuple `(instances, prevalence)` from this collection</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">X</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> An alias to self.instances</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: self.instances</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">y</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> An alias to self.labels</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: self.labels</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">p</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> An alias to self.prevalence()</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: self.prevalence()</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.stats">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.stats">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">stats</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">show</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)</span>
|
|
||||||
<span class="sd"> >>> data.training.stats()</span>
|
|
||||||
<span class="sd"> >>> #instances=3821, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], prevs=[0.081, 0.919]</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param show: if set to True (default), prints the stats in standard output</span>
|
|
||||||
<span class="sd"> :return: a dictionary containing some stats of this collection. Keys include `#instances` (the number of</span>
|
|
||||||
<span class="sd"> instances), `type` (the type representing the instances), `#features` (the number of features, if the</span>
|
|
||||||
<span class="sd"> instances are in array-like format), `#classes` (the classes of the collection), `prevs` (the prevalence</span>
|
|
||||||
<span class="sd"> values for each class)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">ninstances</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
|
||||||
<span class="n">instance_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
|
||||||
<span class="k">if</span> <span class="n">instance_type</span> <span class="o">==</span> <span class="nb">list</span><span class="p">:</span>
|
|
||||||
<span class="n">nfeats</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
|
||||||
<span class="k">elif</span> <span class="n">instance_type</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span> <span class="ow">or</span> <span class="n">issparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="n">nfeats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">nfeats</span> <span class="o">=</span> <span class="s1">'?'</span>
|
|
||||||
<span class="n">stats_</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'instances'</span><span class="p">:</span> <span class="n">ninstances</span><span class="p">,</span>
|
|
||||||
<span class="s1">'type'</span><span class="p">:</span> <span class="n">instance_type</span><span class="p">,</span>
|
|
||||||
<span class="s1">'features'</span><span class="p">:</span> <span class="n">nfeats</span><span class="p">,</span>
|
|
||||||
<span class="s1">'classes'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span>
|
|
||||||
<span class="s1">'prevs'</span><span class="p">:</span> <span class="n">strprev</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())}</span>
|
|
||||||
<span class="k">if</span> <span class="n">show</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'#instances=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"instances"</span><span class="p">]</span><span class="si">}</span><span class="s1">, type=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span><span class="si">}</span><span class="s1">, #features=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"features"</span><span class="p">]</span><span class="si">}</span><span class="s1">, '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'#classes=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"classes"</span><span class="p">]</span><span class="si">}</span><span class="s1">, prevs=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"prevs"</span><span class="p">]</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">stats_</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelledCollection.kFCV">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.kFCV">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">kFCV</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generator of stratified folds to be used in k-fold cross validation.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param nfolds: integer (default 5), the number of folds to generate</span>
|
|
||||||
<span class="sd"> :param nrepeats: integer (default 1), the number of rounds of k-fold cross validation to run</span>
|
|
||||||
<span class="sd"> :param random_state: integer (default 0), guarantees that the folds generated are reproducible</span>
|
|
||||||
<span class="sd"> :return: yields `nfolds * nrepeats` folds for k-fold cross validation</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">kf</span> <span class="o">=</span> <span class="n">RepeatedStratifiedKFold</span><span class="p">(</span><span class="n">n_splits</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">nrepeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">train_index</span><span class="p">,</span> <span class="n">test_index</span> <span class="ow">in</span> <span class="n">kf</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">Xy</span><span class="p">):</span>
|
|
||||||
<span class="n">train</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">train_index</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">test_index</span><span class="p">)</span>
|
|
||||||
<span class="k">yield</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Dataset">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">Dataset</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Abstraction of training and test :class:`LabelledCollection` objects.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param training: a :class:`LabelledCollection` instance</span>
|
|
||||||
<span class="sd"> :param test: a :class:`LabelledCollection` instance</span>
|
|
||||||
<span class="sd"> :param vocabulary: if indicated, is a dictionary of the terms used in this textual dataset</span>
|
|
||||||
<span class="sd"> :param name: a string representing the name of the dataset</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">test</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">vocabulary</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">set</span><span class="p">(</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span> <span class="o">==</span> <span class="nb">set</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="s1">'incompatible labels in training and test collections'</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">training</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">test</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">vocabulary</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Dataset.SplitStratified">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.SplitStratified">[docs]</a>
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">SplitStratified</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">collection</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=</span><span class="mf">0.6</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generates a :class:`Dataset` from a stratified split of a :class:`LabelledCollection` instance.</span>
|
|
||||||
<span class="sd"> See :meth:`LabelledCollection.split_stratified`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param collection: :class:`LabelledCollection`</span>
|
|
||||||
<span class="sd"> :param train_size: the proportion of training documents (the rest conforms the test split)</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`Dataset`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">collection</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="n">train_size</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> The classes according to which the training collection is labelled</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: The classes according to which the training collection is labelled</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">n_classes</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> The number of classes according to which the training collection is labelled</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">n_classes</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns True if the training collection is labelled according to two classes</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: boolean</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">binary</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Dataset.load">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.load">[docs]</a>
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">train_path</span><span class="p">,</span> <span class="n">test_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">:</span> <span class="n">callable</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads a training and a test labelled set of data and convert it into a :class:`Dataset` instance.</span>
|
|
||||||
<span class="sd"> The function in charge of reading the instances must be specified. This function can be a custom one, or any of</span>
|
|
||||||
<span class="sd"> the reading functions defined in :mod:`quapy.data.reader` module.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param train_path: string, the path to the file containing the training instances</span>
|
|
||||||
<span class="sd"> :param test_path: string, the path to the file containing the test instances</span>
|
|
||||||
<span class="sd"> :param loader_func: a custom function that implements the data loader and returns a tuple with instances and</span>
|
|
||||||
<span class="sd"> labels</span>
|
|
||||||
<span class="sd"> :param classes: array-like, the classes according to which the instances are labelled</span>
|
|
||||||
<span class="sd"> :param loader_kwargs: any argument that the `loader_func` function needs in order to read the instances.</span>
|
|
||||||
<span class="sd"> See :meth:`LabelledCollection.load` for further details.</span>
|
|
||||||
<span class="sd"> :return: a :class:`Dataset` object</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">train_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">test_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">train_test</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Alias to `self.training` and `self.test`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: the training and test collections</span>
|
|
||||||
<span class="sd"> :return: the training and test collections</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Dataset.stats">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.stats">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">stats</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">show</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)</span>
|
|
||||||
<span class="sd"> >>> data.stats()</span>
|
|
||||||
<span class="sd"> >>> Dataset=kindle #tr-instances=3821, #te-instances=21591, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], tr-prevs=[0.081, 0.919], te-prevs=[0.063, 0.937]</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param show: if set to True (default), prints the stats in standard output</span>
|
|
||||||
<span class="sd"> :return: a dictionary containing some stats of this collection for the training and test collections. The keys</span>
|
|
||||||
<span class="sd"> are `train` and `test`, and point to dedicated dictionaries of stats, for each collection, with keys</span>
|
|
||||||
<span class="sd"> `#instances` (the number of instances), `type` (the type representing the instances),</span>
|
|
||||||
<span class="sd"> `#features` (the number of features, if the instances are in array-like format), `#classes` (the classes of</span>
|
|
||||||
<span class="sd"> the collection), `prevs` (the prevalence values for each class)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">tr_stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">(</span><span class="n">show</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="n">te_stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">stats</span><span class="p">(</span><span class="n">show</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">show</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="si">}</span><span class="s1"> #tr-instances=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"instances"</span><span class="p">]</span><span class="si">}</span><span class="s1">, #te-instances=</span><span class="si">{</span><span class="n">te_stats</span><span class="p">[</span><span class="s2">"instances"</span><span class="p">]</span><span class="si">}</span><span class="s1">, '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'type=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span><span class="si">}</span><span class="s1">, #features=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"features"</span><span class="p">]</span><span class="si">}</span><span class="s1">, #classes=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"classes"</span><span class="p">]</span><span class="si">}</span><span class="s1">, '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'tr-prevs=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"prevs"</span><span class="p">]</span><span class="si">}</span><span class="s1">, te-prevs=</span><span class="si">{</span><span class="n">te_stats</span><span class="p">[</span><span class="s2">"prevs"</span><span class="p">]</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="p">{</span><span class="s1">'train'</span><span class="p">:</span> <span class="n">tr_stats</span><span class="p">,</span> <span class="s1">'test'</span><span class="p">:</span> <span class="n">te_stats</span><span class="p">}</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Dataset.kFCV">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.kFCV">[docs]</a>
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">kFCV</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around</span>
|
|
||||||
<span class="sd"> :meth:`LabelledCollection.kFCV` that returns :class:`Dataset` instances made of training and test folds.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param nfolds: integer (default 5), the number of folds to generate</span>
|
|
||||||
<span class="sd"> :param nrepeats: integer (default 1), the number of rounds of k-fold cross validation to run</span>
|
|
||||||
<span class="sd"> :param random_state: integer (default 0), guarantees that the folds generated are reproducible</span>
|
|
||||||
<span class="sd"> :return: yields `nfolds * nrepeats` folds for k-fold cross validation as instances of :class:`Dataset`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">nfolds</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="n">nrepeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)):</span>
|
|
||||||
<span class="k">yield</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="sa">f</span><span class="s1">'fold </span><span class="si">{</span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="n">nfolds</span><span class="p">)</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">nfolds</span><span class="si">}</span><span class="s1"> (round=</span><span class="si">{</span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">//</span><span class="w"> </span><span class="n">nfolds</span><span class="p">)</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Dataset.reduce">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.reduce">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">reduce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_train</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">n_test</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param n_train: number of training documents to keep (default 100)</span>
|
|
||||||
<span class="sd"> :param n_test: number of test documents to keep (default 100)</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="n">n_train</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="n">n_test</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,919 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.data.datasets — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../../_static/jquery.js"></script>
|
|
||||||
<script src="../../../_static/underscore.js"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../../_static/doctools.js"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.data.datasets</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.data.datasets</h1><div class="highlight"><pre>
|
|
||||||
<div class="viewcode-block" id="warn"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.warn">[docs]</a><span></span><span class="k">def</span> <span class="nf">warn</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="k">pass</span></div>
|
|
||||||
<span class="kn">import</span> <span class="nn">warnings</span>
|
|
||||||
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span> <span class="o">=</span> <span class="n">warn</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">os</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">zipfile</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">join</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">ucimlrepo</span> <span class="kn">import</span> <span class="n">fetch_ucirepo</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data.preprocessing</span> <span class="kn">import</span> <span class="n">text2tfidf</span><span class="p">,</span> <span class="n">reduce_columns</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data.reader</span> <span class="kn">import</span> <span class="o">*</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">download_file_if_not_exists</span><span class="p">,</span> <span class="n">download_file</span><span class="p">,</span> <span class="n">get_quapy_home</span><span class="p">,</span> <span class="n">pickled_resource</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="n">REVIEWS_SENTIMENT_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'hp'</span><span class="p">,</span> <span class="s1">'kindle'</span><span class="p">,</span> <span class="s1">'imdb'</span><span class="p">]</span>
|
|
||||||
<span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'gasp'</span><span class="p">,</span> <span class="s1">'hcr'</span><span class="p">,</span> <span class="s1">'omd'</span><span class="p">,</span> <span class="s1">'sanders'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'semeval13'</span><span class="p">,</span> <span class="s1">'semeval14'</span><span class="p">,</span> <span class="s1">'semeval15'</span><span class="p">,</span> <span class="s1">'semeval16'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'sst'</span><span class="p">,</span> <span class="s1">'wa'</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">]</span>
|
|
||||||
<span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'gasp'</span><span class="p">,</span> <span class="s1">'hcr'</span><span class="p">,</span> <span class="s1">'omd'</span><span class="p">,</span> <span class="s1">'sanders'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'semeval'</span><span class="p">,</span> <span class="s1">'semeval16'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'sst'</span><span class="p">,</span> <span class="s1">'wa'</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">]</span>
|
|
||||||
<span class="n">UCI_BINARY_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'acute.a'</span><span class="p">,</span> <span class="s1">'acute.b'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'balance.1'</span><span class="p">,</span> <span class="s1">'balance.2'</span><span class="p">,</span> <span class="s1">'balance.3'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'breast-cancer'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'cmc.1'</span><span class="p">,</span> <span class="s1">'cmc.2'</span><span class="p">,</span> <span class="s1">'cmc.3'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ctg.1'</span><span class="p">,</span> <span class="s1">'ctg.2'</span><span class="p">,</span> <span class="s1">'ctg.3'</span><span class="p">,</span>
|
|
||||||
<span class="c1">#'diabetes', # <-- I haven't found this one...</span>
|
|
||||||
<span class="s1">'german'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'haberman'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ionosphere'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'iris.1'</span><span class="p">,</span> <span class="s1">'iris.2'</span><span class="p">,</span> <span class="s1">'iris.3'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'mammographic'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'pageblocks.5'</span><span class="p">,</span>
|
|
||||||
<span class="c1">#'phoneme', # <-- I haven't found this one...</span>
|
|
||||||
<span class="s1">'semeion'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'sonar'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'spambase'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'spectf'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'tictactoe'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'transfusion'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wdbc'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine.1'</span><span class="p">,</span> <span class="s1">'wine.2'</span><span class="p">,</span> <span class="s1">'wine.3'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine-q-red'</span><span class="p">,</span> <span class="s1">'wine-q-white'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'yeast'</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">UCI_MULTICLASS_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'dry-bean'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine-quality'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'academic-success'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'digits'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'letter'</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">LEQUA2022_TASKS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'T1A'</span><span class="p">,</span> <span class="s1">'T1B'</span><span class="p">,</span> <span class="s1">'T2A'</span><span class="p">,</span> <span class="s1">'T2B'</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">_TXA_SAMPLE_SIZE</span> <span class="o">=</span> <span class="mi">250</span>
|
|
||||||
<span class="n">_TXB_SAMPLE_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
|
|
||||||
|
|
||||||
<span class="n">LEQUA2022_SAMPLE_SIZE</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s1">'TXA'</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
|
|
||||||
<span class="s1">'TXB'</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
|
|
||||||
<span class="s1">'T1A'</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
|
|
||||||
<span class="s1">'T1B'</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
|
|
||||||
<span class="s1">'T2A'</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
|
|
||||||
<span class="s1">'T2B'</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
|
|
||||||
<span class="s1">'binary'</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
|
|
||||||
<span class="s1">'multiclass'</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="fetch_reviews"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_reviews">[docs]</a><span class="k">def</span> <span class="nf">fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dataset</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads a Reviews dataset as a Dataset instance, as used in</span>
|
|
||||||
<span class="sd"> `Esuli, A., Moreo, A., and Sebastiani, F. "A recurrent neural network for sentiment quantification."</span>
|
|
||||||
<span class="sd"> Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018. <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_.</span>
|
|
||||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.REVIEWS_SENTIMENT_DATASETS`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset_name: the name of the dataset: valid ones are 'hp', 'kindle', 'imdb'</span>
|
|
||||||
<span class="sd"> :param tfidf: set to True to transform the raw documents into tfidf weighted matrices</span>
|
|
||||||
<span class="sd"> :param min_df: minimun number of documents that should contain a term in order for the term to be</span>
|
|
||||||
<span class="sd"> kept (ignored if tfidf==False)</span>
|
|
||||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
|
||||||
<span class="sd"> ~/quay_data/ directory)</span>
|
|
||||||
<span class="sd"> :param pickle: set to True to pickle the Dataset object the first time it is generated, in order to allow for</span>
|
|
||||||
<span class="sd"> faster subsequent invokations</span>
|
|
||||||
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset for sentiment reviews. '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Valid ones are </span><span class="si">{</span><span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">URL_TRAIN</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'https://zenodo.org/record/4117827/files/</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">_train.txt'</span>
|
|
||||||
<span class="n">URL_TEST</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'https://zenodo.org/record/4117827/files/</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">_test.txt'</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'reviews'</span><span class="p">),</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">train_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'reviews'</span><span class="p">,</span> <span class="n">dataset_name</span><span class="p">,</span> <span class="s1">'train.txt'</span><span class="p">)</span>
|
|
||||||
<span class="n">test_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'reviews'</span><span class="p">,</span> <span class="n">dataset_name</span><span class="p">,</span> <span class="s1">'test.txt'</span><span class="p">)</span>
|
|
||||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">URL_TRAIN</span><span class="p">,</span> <span class="n">train_path</span><span class="p">)</span>
|
|
||||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">URL_TEST</span><span class="p">,</span> <span class="n">test_path</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">pickle_path</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="k">if</span> <span class="n">pickle</span><span class="p">:</span>
|
|
||||||
<span class="n">pickle_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'reviews'</span><span class="p">,</span> <span class="s1">'pickle'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">.pkl'</span><span class="p">)</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="n">Dataset</span><span class="o">.</span><span class="n">load</span><span class="p">,</span> <span class="n">train_path</span><span class="p">,</span> <span class="n">test_path</span><span class="p">,</span> <span class="n">from_text</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">tfidf</span><span class="p">:</span>
|
|
||||||
<span class="n">text2tfidf</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">min_df</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">reduce_columns</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">data</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">dataset_name</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">data</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="fetch_twitter"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_twitter">[docs]</a><span class="k">def</span> <span class="nf">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dataset</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads a Twitter dataset as a :class:`quapy.data.base.Dataset` instance, as used in:</span>
|
|
||||||
<span class="sd"> `Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.</span>
|
|
||||||
<span class="sd"> Social Network Analysis and Mining6(19), 1–22 (2016) <https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf>`_</span>
|
|
||||||
<span class="sd"> Note that the datasets 'semeval13', 'semeval14', 'semeval15' share the same training set.</span>
|
|
||||||
<span class="sd"> The list of valid dataset names corresponding to training sets can be accessed in</span>
|
|
||||||
<span class="sd"> `quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN`, while the test sets can be accessed in</span>
|
|
||||||
<span class="sd"> `quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TEST`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset_name: the name of the dataset: valid ones are 'gasp', 'hcr', 'omd', 'sanders', 'semeval13',</span>
|
|
||||||
<span class="sd"> 'semeval14', 'semeval15', 'semeval16', 'sst', 'wa', 'wb'</span>
|
|
||||||
<span class="sd"> :param for_model_selection: if True, then returns the train split as the training set and the devel split</span>
|
|
||||||
<span class="sd"> as the test set; if False, then returns the train+devel split as the training set and the test set as the</span>
|
|
||||||
<span class="sd"> test set</span>
|
|
||||||
<span class="sd"> :param min_df: minimun number of documents that should contain a term in order for the term to be kept</span>
|
|
||||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
|
||||||
<span class="sd"> ~/quay_data/ directory)</span>
|
|
||||||
<span class="sd"> :param pickle: set to True to pickle the Dataset object the first time it is generated, in order to allow for</span>
|
|
||||||
<span class="sd"> faster subsequent invokations</span>
|
|
||||||
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span> <span class="o">+</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset for sentiment twitter. '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Valid ones are </span><span class="si">{</span><span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="si">}</span><span class="s1"> for model selection and '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="si">}</span><span class="s1"> for test (datasets "semeval14", "semeval15", "semeval16" share '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'a common training set "semeval")'</span>
|
|
||||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">URL</span> <span class="o">=</span> <span class="s1">'https://zenodo.org/record/4255764/files/tweet_sentiment_quantification_snam.zip'</span>
|
|
||||||
<span class="n">unzipped_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'tweet_sentiment_quantification_snam'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">):</span>
|
|
||||||
<span class="n">downloaded_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'tweet_sentiment_quantification_snam.zip'</span><span class="p">)</span>
|
|
||||||
<span class="n">download_file</span><span class="p">(</span><span class="n">URL</span><span class="p">,</span> <span class="n">downloaded_path</span><span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">downloaded_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
|
|
||||||
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">data_home</span><span class="p">)</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">downloaded_path</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="p">{</span><span class="s1">'semeval13'</span><span class="p">,</span> <span class="s1">'semeval14'</span><span class="p">,</span> <span class="s1">'semeval15'</span><span class="p">}:</span>
|
|
||||||
<span class="n">trainset_name</span> <span class="o">=</span> <span class="s1">'semeval'</span>
|
|
||||||
<span class="n">testset_name</span> <span class="o">=</span> <span class="s1">'semeval'</span> <span class="k">if</span> <span class="n">for_model_selection</span> <span class="k">else</span> <span class="n">dataset_name</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"the training and development sets for datasets 'semeval13', 'semeval14', 'semeval15' are common "</span>
|
|
||||||
<span class="sa">f</span><span class="s2">"(called 'semeval'); returning trainin-set='</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s2">' and test-set=</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'semeval'</span> <span class="ow">and</span> <span class="n">for_model_selection</span><span class="o">==</span><span class="kc">False</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'dataset "semeval" can only be used for model selection. '</span>
|
|
||||||
<span class="s1">'Use "semeval13", "semeval14", or "semeval15" for model evaluation.'</span><span class="p">)</span>
|
|
||||||
<span class="n">trainset_name</span> <span class="o">=</span> <span class="n">testset_name</span> <span class="o">=</span> <span class="n">dataset_name</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">for_model_selection</span><span class="p">:</span>
|
|
||||||
<span class="n">train</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'train'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s1">.train.feature.txt'</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'test'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.dev.feature.txt'</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">train</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'train'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s1">.train+dev.feature.txt'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'semeval16'</span><span class="p">:</span> <span class="c1"># there is a different test name in the case of semeval16 only</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'test'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.dev-test.feature.txt'</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'test'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.test.feature.txt'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">pickle_path</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="k">if</span> <span class="n">pickle</span><span class="p">:</span>
|
|
||||||
<span class="n">mode</span> <span class="o">=</span> <span class="s2">"train-dev"</span> <span class="k">if</span> <span class="n">for_model_selection</span> <span class="k">else</span> <span class="s2">"train+dev-test"</span>
|
|
||||||
<span class="n">pickle_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'pickle'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">mode</span><span class="si">}</span><span class="s1">.pkl'</span><span class="p">)</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="n">Dataset</span><span class="o">.</span><span class="n">load</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">from_sparse</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">min_df</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">reduce_columns</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">data</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">dataset_name</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">data</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="fetch_UCIBinaryDataset"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIBinaryDataset">[docs]</a><span class="k">def</span> <span class="nf">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">test_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dataset</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads a UCI dataset as an instance of :class:`quapy.data.base.Dataset`, as used in</span>
|
|
||||||
<span class="sd"> `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).</span>
|
|
||||||
<span class="sd"> Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.</span>
|
|
||||||
<span class="sd"> Information Fusion, 34, 87-100. <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_</span>
|
|
||||||
<span class="sd"> and</span>
|
|
||||||
<span class="sd"> `Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).</span>
|
|
||||||
<span class="sd"> Dynamic ensemble selection for quantification tasks.</span>
|
|
||||||
<span class="sd"> Information Fusion, 45, 1-15. <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
|
||||||
<span class="sd"> The datasets do not come with a predefined train-test split (see :meth:`fetch_UCILabelledCollection` for further</span>
|
|
||||||
<span class="sd"> information on how to use these collections), and so a train-test split is generated at desired proportion.</span>
|
|
||||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset_name: a dataset name</span>
|
|
||||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
|
||||||
<span class="sd"> ~/quay_data/ directory)</span>
|
|
||||||
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
|
|
||||||
<span class="sd"> :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets</span>
|
|
||||||
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_UCIBinaryLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">test_split</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="fetch_UCIBinaryLabelledCollection"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIBinaryLabelledCollection">[docs]</a><span class="k">def</span> <span class="nf">fetch_UCIBinaryLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">LabelledCollection</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads a UCI collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in</span>
|
|
||||||
<span class="sd"> `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).</span>
|
|
||||||
<span class="sd"> Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.</span>
|
|
||||||
<span class="sd"> Information Fusion, 34, 87-100. <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_</span>
|
|
||||||
<span class="sd"> and</span>
|
|
||||||
<span class="sd"> `Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).</span>
|
|
||||||
<span class="sd"> Dynamic ensemble selection for quantification tasks.</span>
|
|
||||||
<span class="sd"> Information Fusion, 45, 1-15. <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
|
||||||
<span class="sd"> The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation</span>
|
|
||||||
<span class="sd"> protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation.</span>
|
|
||||||
<span class="sd"> This can be reproduced by using :meth:`quapy.data.base.Dataset.kFCV`, e.g.:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> import quapy as qp</span>
|
|
||||||
<span class="sd"> >>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast")</span>
|
|
||||||
<span class="sd"> >>> for data in qp.train.Dataset.kFCV(collection, nfolds=5, nrepeats=2):</span>
|
|
||||||
<span class="sd"> >>> ...</span>
|
|
||||||
|
|
||||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset_name: a dataset name</span>
|
|
||||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
|
||||||
<span class="sd"> ~/quay_data/ directory)</span>
|
|
||||||
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
|
|
||||||
<span class="sd"> :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets</span>
|
|
||||||
<span class="sd"> :return: a :class:`quapy.data.base.LabelledCollection` instance</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset from the UCI Machine Learning datasets repository. '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Valid ones are </span><span class="si">{</span><span class="n">UCI_BINARY_DATASETS</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">dataset_fullname</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s1">'acute.a'</span><span class="p">:</span> <span class="s1">'Acute Inflammations (urinary bladder)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'acute.b'</span><span class="p">:</span> <span class="s1">'Acute Inflammations (renal pelvis)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'balance.1'</span><span class="p">:</span> <span class="s1">'Balance Scale Weight & Distance Database (left)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'balance.2'</span><span class="p">:</span> <span class="s1">'Balance Scale Weight & Distance Database (balanced)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'balance.3'</span><span class="p">:</span> <span class="s1">'Balance Scale Weight & Distance Database (right)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'breast-cancer'</span><span class="p">:</span> <span class="s1">'Breast Cancer Wisconsin (Original)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'cmc.1'</span><span class="p">:</span> <span class="s1">'Contraceptive Method Choice (no use)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'cmc.2'</span><span class="p">:</span> <span class="s1">'Contraceptive Method Choice (long term)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'cmc.3'</span><span class="p">:</span> <span class="s1">'Contraceptive Method Choice (short term)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ctg.1'</span><span class="p">:</span> <span class="s1">'Cardiotocography Data Set (normal)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ctg.2'</span><span class="p">:</span> <span class="s1">'Cardiotocography Data Set (suspect)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ctg.3'</span><span class="p">:</span> <span class="s1">'Cardiotocography Data Set (pathologic)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'german'</span><span class="p">:</span> <span class="s1">'Statlog German Credit Data'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'haberman'</span><span class="p">:</span> <span class="s2">"Haberman's Survival Data"</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ionosphere'</span><span class="p">:</span> <span class="s1">'Johns Hopkins University Ionosphere DB'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'iris.1'</span><span class="p">:</span> <span class="s1">'Iris Plants Database(x)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'iris.2'</span><span class="p">:</span> <span class="s1">'Iris Plants Database(versicolour)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'iris.3'</span><span class="p">:</span> <span class="s1">'Iris Plants Database(virginica)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'mammographic'</span><span class="p">:</span> <span class="s1">'Mammographic Mass'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'pageblocks.5'</span><span class="p">:</span> <span class="s1">'Page Blocks Classification (5)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'semeion'</span><span class="p">:</span> <span class="s1">'Semeion Handwritten Digit (8)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'sonar'</span><span class="p">:</span> <span class="s1">'Sonar, Mines vs. Rocks'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'spambase'</span><span class="p">:</span> <span class="s1">'Spambase Data Set'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'spectf'</span><span class="p">:</span> <span class="s1">'SPECTF Heart Data'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'tictactoe'</span><span class="p">:</span> <span class="s1">'Tic-Tac-Toe Endgame Database'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'transfusion'</span><span class="p">:</span> <span class="s1">'Blood Transfusion Service Center Data Set'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wdbc'</span><span class="p">:</span> <span class="s1">'Wisconsin Diagnostic Breast Cancer'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine.1'</span><span class="p">:</span> <span class="s1">'Wine Recognition Data (1)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine.2'</span><span class="p">:</span> <span class="s1">'Wine Recognition Data (2)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine.3'</span><span class="p">:</span> <span class="s1">'Wine Recognition Data (3)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine-q-red'</span><span class="p">:</span> <span class="s1">'Wine Quality Red (6-10)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine-q-white'</span><span class="p">:</span> <span class="s1">'Wine Quality White (6-10)'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'yeast'</span><span class="p">:</span> <span class="s1">'Yeast'</span><span class="p">,</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="c1"># the identifier is an alias for the dataset group, it's part of the url data-folder, and is the name we use</span>
|
|
||||||
<span class="c1"># to download the raw dataset</span>
|
|
||||||
<span class="n">identifier_map</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s1">'acute.a'</span><span class="p">:</span> <span class="s1">'acute'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'acute.b'</span><span class="p">:</span> <span class="s1">'acute'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'balance.1'</span><span class="p">:</span> <span class="s1">'balance-scale'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'balance.2'</span><span class="p">:</span> <span class="s1">'balance-scale'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'balance.3'</span><span class="p">:</span> <span class="s1">'balance-scale'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'breast-cancer'</span><span class="p">:</span> <span class="s1">'breast-cancer-wisconsin'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'cmc.1'</span><span class="p">:</span> <span class="s1">'cmc'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'cmc.2'</span><span class="p">:</span> <span class="s1">'cmc'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'cmc.3'</span><span class="p">:</span> <span class="s1">'cmc'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ctg.1'</span><span class="p">:</span> <span class="s1">'00193'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ctg.2'</span><span class="p">:</span> <span class="s1">'00193'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ctg.3'</span><span class="p">:</span> <span class="s1">'00193'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'german'</span><span class="p">:</span> <span class="s1">'statlog/german'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'haberman'</span><span class="p">:</span> <span class="s1">'haberman'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ionosphere'</span><span class="p">:</span> <span class="s1">'ionosphere'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'iris.1'</span><span class="p">:</span> <span class="s1">'iris'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'iris.2'</span><span class="p">:</span> <span class="s1">'iris'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'iris.3'</span><span class="p">:</span> <span class="s1">'iris'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'mammographic'</span><span class="p">:</span> <span class="s1">'mammographic-masses'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'pageblocks.5'</span><span class="p">:</span> <span class="s1">'page-blocks'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'semeion'</span><span class="p">:</span> <span class="s1">'semeion'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'sonar'</span><span class="p">:</span> <span class="s1">'undocumented/connectionist-bench/sonar'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'spambase'</span><span class="p">:</span> <span class="s1">'spambase'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'spectf'</span><span class="p">:</span> <span class="s1">'spect'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'tictactoe'</span><span class="p">:</span> <span class="s1">'tic-tac-toe'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'transfusion'</span><span class="p">:</span> <span class="s1">'blood-transfusion'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wdbc'</span><span class="p">:</span> <span class="s1">'breast-cancer-wisconsin'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine-q-red'</span><span class="p">:</span> <span class="s1">'wine-quality'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine-q-white'</span><span class="p">:</span> <span class="s1">'wine-quality'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine.1'</span><span class="p">:</span> <span class="s1">'wine'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine.2'</span><span class="p">:</span> <span class="s1">'wine'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine.3'</span><span class="p">:</span> <span class="s1">'wine'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'yeast'</span><span class="p">:</span> <span class="s1">'yeast'</span><span class="p">,</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="c1"># the filename is the name of the file within the data_folder indexed by the identifier</span>
|
|
||||||
<span class="n">file_name</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s1">'acute'</span><span class="p">:</span> <span class="s1">'diagnosis.data'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'00193'</span><span class="p">:</span> <span class="s1">'CTG.xls'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'statlog/german'</span><span class="p">:</span> <span class="s1">'german.data-numeric'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'mammographic-masses'</span><span class="p">:</span> <span class="s1">'mammographic_masses.data'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'page-blocks'</span><span class="p">:</span> <span class="s1">'page-blocks.data.Z'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'undocumented/connectionist-bench/sonar'</span><span class="p">:</span> <span class="s1">'sonar.all-data'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'spect'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'SPECTF.train'</span><span class="p">,</span> <span class="s1">'SPECTF.test'</span><span class="p">],</span>
|
|
||||||
<span class="s1">'blood-transfusion'</span><span class="p">:</span> <span class="s1">'transfusion.data'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine-quality'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'winequality-red.csv'</span><span class="p">,</span> <span class="s1">'winequality-white.csv'</span><span class="p">],</span>
|
|
||||||
<span class="s1">'breast-cancer-wisconsin'</span><span class="p">:</span> <span class="s1">'breast-cancer-wisconsin.data'</span> <span class="k">if</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">'breast-cancer'</span> <span class="k">else</span> <span class="s1">'wdbc.data'</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="c1"># the filename containing the dataset description (if any)</span>
|
|
||||||
<span class="n">desc_name</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s1">'acute'</span><span class="p">:</span> <span class="s1">'diagnosis.names'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'00193'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="s1">'statlog/german'</span><span class="p">:</span> <span class="s1">'german.doc'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'mammographic-masses'</span><span class="p">:</span> <span class="s1">'mammographic_masses.names'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'undocumented/connectionist-bench/sonar'</span><span class="p">:</span> <span class="s1">'sonar.names'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'spect'</span><span class="p">:</span> <span class="s1">'SPECTF.names'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'blood-transfusion'</span><span class="p">:</span> <span class="s1">'transfusion.names'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'wine-quality'</span><span class="p">:</span> <span class="s1">'winequality.names'</span><span class="p">,</span>
|
|
||||||
<span class="s1">'breast-cancer-wisconsin'</span><span class="p">:</span> <span class="s1">'breast-cancer-wisconsin.names'</span> <span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'breast-cancer'</span> <span class="k">else</span> <span class="s1">'wdbc.names'</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="n">identifier</span> <span class="o">=</span> <span class="n">identifier_map</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
|
|
||||||
<span class="n">filename</span> <span class="o">=</span> <span class="n">file_name</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">identifier</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">.data'</span><span class="p">)</span>
|
|
||||||
<span class="n">descfile</span> <span class="o">=</span> <span class="n">desc_name</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">identifier</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">.names'</span><span class="p">)</span>
|
|
||||||
<span class="n">fullname</span> <span class="o">=</span> <span class="n">dataset_fullname</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">URL</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'http://archive.ics.uci.edu/ml/machine-learning-databases/</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="n">data_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'uci_datasets'</span><span class="p">,</span> <span class="n">identifier</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> <span class="c1"># filename could be a list of files, in which case it will be processed later</span>
|
|
||||||
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
|
|
||||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">descfile</span><span class="p">:</span>
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">data_dir</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">data_dir</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
|
|
||||||
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'could not read the description file'</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'no file description available'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Loading </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> (</span><span class="si">{</span><span class="n">fullname</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'acute'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-16'</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'</span><span class="se">\t</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">float</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">'.'</span><span class="p">)))</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="p">[</span><span class="n">_df_replace</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">6</span><span class="p">)]</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'acute.a'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">6</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'yes'</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'acute.b'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">7</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'yes'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'balance-scale'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'balance.1'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'L'</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'balance.2'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'B'</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'balance.3'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'R'</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'breast-cancer-wisconsin'</span> <span class="ow">and</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">'breast-cancer'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">Xy</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">10</span><span class="p">]</span>
|
|
||||||
<span class="n">Xy</span><span class="p">[</span><span class="n">Xy</span><span class="o">==</span><span class="s1">'?'</span><span class="p">]</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span>
|
|
||||||
<span class="n">Xy</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">Xy</span><span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'breast-cancer-wisconsin'</span> <span class="ow">and</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">'wdbc'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">:</span><span class="mi">32</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'M'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'cmc'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">8</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'cmc.1'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'cmc.2'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'cmc.3'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'00193'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">'Data'</span><span class="p">,</span> <span class="n">skipfooter</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">24</span><span class="p">))]</span> <span class="c1"># select columns numbered (number 23 is the target label)</span>
|
|
||||||
<span class="c1"># replaces the header with the first row</span>
|
|
||||||
<span class="n">new_header</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># grab the first row for the header</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="c1"># take the data less the header row</span>
|
|
||||||
<span class="n">df</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">new_header</span> <span class="c1"># set the header row as the df header</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">22</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s1">'NSP'</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'ctg.1'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># 1==Normal</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'ctg.2'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> <span class="c1"># 2==Suspect</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'ctg.3'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span> <span class="c1"># 3==Pathologic</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'statlog/german'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">24</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">24</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'haberman'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'ionosphere'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">34</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">34</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'b'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'iris'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'iris.1'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'Iris-setosa'</span><span class="p">)</span> <span class="c1"># 1==Setosa</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'iris.2'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'Iris-versicolor'</span><span class="p">)</span> <span class="c1"># 2==Versicolor</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'iris.3'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'Iris-virginica'</span><span class="p">)</span> <span class="c1"># 3==Virginica</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'mammographic-masses'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">df</span><span class="p">[</span><span class="n">df</span> <span class="o">==</span> <span class="s1">'?'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
|
|
||||||
<span class="n">Xy</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">Xy</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span><span class="mi">5</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'page-blocks'</span><span class="p">:</span>
|
|
||||||
<span class="n">data_path_</span> <span class="o">=</span> <span class="n">data_path</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'.Z'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">data_path_</span><span class="p">):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Warning: file </span><span class="si">{</span><span class="n">data_path_</span><span class="si">}</span><span class="s1"> does not exist. If this is the first time you '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'attempt to load this dataset, then you have to manually unzip the </span><span class="si">{</span><span class="n">data_path</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'and name the extracted file </span><span class="si">{</span><span class="n">data_path_</span><span class="si">}</span><span class="s1"> (unfortunately, neither zipfile, nor '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'gzip can handle unix compressed files automatically -- there is a repo in GitHub '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'https://github.com/umeat/unlzw where the problem seems to be solved anyway).'</span><span class="p">)</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path_</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">10</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">10</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span> <span class="c1"># 5==block "graphic"</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'semeion'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span> <span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">256</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">263</span><span class="p">]</span><span class="o">.</span><span class="n">values</span> <span class="c1"># 263 stands for digit 8 (labels are one-hot vectors from col 256-266)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'undocumented/connectionist-bench/sonar'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">60</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">60</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'R'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'spambase'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">57</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">57</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'spect'</span><span class="p">:</span>
|
|
||||||
<span class="n">dfs</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">file</span> <span class="ow">in</span> <span class="n">filename</span><span class="p">:</span>
|
|
||||||
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">file</span><span class="p">)</span>
|
|
||||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">file</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
|
|
||||||
<span class="n">dfs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">))</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="n">dfs</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">45</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'tic-tac-toe'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'o'</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'b'</span><span class="p">,</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'x'</span><span class="p">,</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'negative'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'blood-transfusion'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'wine'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">14</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'wine.1'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'wine.2'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'wine.3'</span><span class="p">:</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'wine-quality'</span><span class="p">:</span>
|
|
||||||
<span class="n">filename</span> <span class="o">=</span> <span class="n">filename</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">'wine-q-red'</span> <span class="k">else</span> <span class="n">filename</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
|
|
||||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">';'</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">11</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">11</span><span class="p">]</span><span class="o">.</span><span class="n">values</span> <span class="o">></span> <span class="mi">5</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'yeast'</span><span class="p">:</span>
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'NUC'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">data</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="fetch_UCIMulticlassDataset"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIMulticlassDataset">[docs]</a><span class="k">def</span> <span class="nf">fetch_UCIMulticlassDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">test_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dataset</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`. </span>
|
|
||||||
|
|
||||||
<span class="sd"> The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria:</span>
|
|
||||||
<span class="sd"> - It has more than 1000 instances</span>
|
|
||||||
<span class="sd"> - It is suited for classification</span>
|
|
||||||
<span class="sd"> - It has more than two classes</span>
|
|
||||||
<span class="sd"> - It is available for Python import (requires ucimlrepo package)</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> import quapy as qp</span>
|
|
||||||
<span class="sd"> >>> dataset = qp.datasets.fetch_UCIMulticlassDataset("dry-bean")</span>
|
|
||||||
<span class="sd"> >>> train, test = dataset.train_test</span>
|
|
||||||
<span class="sd"> >>> ...</span>
|
|
||||||
|
|
||||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS`</span>
|
|
||||||
|
|
||||||
<span class="sd"> The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset_name: a dataset name</span>
|
|
||||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
|
||||||
<span class="sd"> ~/quay_data/ directory)</span>
|
|
||||||
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
|
|
||||||
<span class="sd"> :param verbose: set to True (default is False) to get information (stats) about the dataset</span>
|
|
||||||
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">test_split</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="fetch_UCIMulticlassLabelledCollection"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIMulticlassLabelledCollection">[docs]</a><span class="k">def</span> <span class="nf">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">LabelledCollection</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads a UCI multiclass collection as an instance of :class:`quapy.data.base.LabelledCollection`.</span>
|
|
||||||
|
|
||||||
<span class="sd"> The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria:</span>
|
|
||||||
<span class="sd"> - It has more than 1000 instances</span>
|
|
||||||
<span class="sd"> - It is suited for classification</span>
|
|
||||||
<span class="sd"> - It has more than two classes</span>
|
|
||||||
<span class="sd"> - It is available for Python import (requires ucimlrepo package)</span>
|
|
||||||
<span class="sd"> </span>
|
|
||||||
<span class="sd"> >>> import quapy as qp</span>
|
|
||||||
<span class="sd"> >>> collection = qp.datasets.fetch_UCIMulticlassLabelledCollection("dry-bean")</span>
|
|
||||||
<span class="sd"> >>> X, y = collection.Xy</span>
|
|
||||||
<span class="sd"> >>> ...</span>
|
|
||||||
|
|
||||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS`</span>
|
|
||||||
|
|
||||||
<span class="sd"> The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset_name: a dataset name</span>
|
|
||||||
<span class="sd"> :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default</span>
|
|
||||||
<span class="sd"> ~/quay_data/ directory)</span>
|
|
||||||
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
|
|
||||||
<span class="sd"> :param verbose: set to True (default is False) to get information (stats) about the dataset</span>
|
|
||||||
<span class="sd"> :return: a :class:`quapy.data.base.LabelledCollection` instance</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset from the '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'UCI Machine Learning datasets repository (multiclass). '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Valid ones are </span><span class="si">{</span><span class="n">UCI_MULTICLASS_DATASETS</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">identifiers</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s2">"dry-bean"</span><span class="p">:</span> <span class="mi">602</span><span class="p">,</span>
|
|
||||||
<span class="s2">"wine-quality"</span><span class="p">:</span> <span class="mi">186</span><span class="p">,</span>
|
|
||||||
<span class="s2">"academic-success"</span><span class="p">:</span> <span class="mi">697</span><span class="p">,</span>
|
|
||||||
<span class="s2">"digits"</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span>
|
|
||||||
<span class="s2">"letter"</span><span class="p">:</span> <span class="mi">59</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="n">full_names</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s2">"dry-bean"</span><span class="p">:</span> <span class="s2">"Dry Bean Dataset"</span><span class="p">,</span>
|
|
||||||
<span class="s2">"wine-quality"</span><span class="p">:</span> <span class="s2">"Wine Quality"</span><span class="p">,</span>
|
|
||||||
<span class="s2">"academic-success"</span><span class="p">:</span> <span class="s2">"Predict students' dropout and academic success"</span><span class="p">,</span>
|
|
||||||
<span class="s2">"digits"</span><span class="p">:</span> <span class="s2">"Optical Recognition of Handwritten Digits"</span><span class="p">,</span>
|
|
||||||
<span class="s2">"letter"</span><span class="p">:</span> <span class="s2">"Letter Recognition"</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="n">identifier</span> <span class="o">=</span> <span class="n">identifiers</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
|
|
||||||
<span class="n">fullname</span> <span class="o">=</span> <span class="n">full_names</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Loading UCI Muticlass </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> (</span><span class="si">{</span><span class="n">fullname</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">file</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'uci_multiclass'</span><span class="p">,</span> <span class="n">dataset_name</span><span class="o">+</span><span class="s1">'.pkl'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">download</span><span class="p">(</span><span class="nb">id</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_ucirepo</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="nb">id</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'features'</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">(),</span> <span class="n">data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'targets'</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span>
|
|
||||||
<span class="n">classes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">searchsorted</span><span class="p">(</span><span class="n">classes</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="n">download</span><span class="p">,</span> <span class="n">identifier</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">data</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_df_replace</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">col</span><span class="p">,</span> <span class="n">repl</span><span class="o">=</span><span class="p">{</span><span class="s1">'yes'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'no'</span><span class="p">:</span><span class="mi">0</span><span class="p">},</span> <span class="n">astype</span><span class="o">=</span><span class="nb">float</span><span class="p">):</span>
|
|
||||||
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span><span class="n">repl</span><span class="p">[</span><span class="n">x</span><span class="p">])</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">astype</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="fetch_lequa2022"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_lequa2022">[docs]</a><span class="k">def</span> <span class="nf">fetch_lequa2022</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition.</span>
|
|
||||||
<span class="sd"> In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification</span>
|
|
||||||
<span class="sd"> problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead.</span>
|
|
||||||
<span class="sd"> Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification</span>
|
|
||||||
<span class="sd"> problems consisting of estimating the class prevalence values of 28 different merchandise products.</span>
|
|
||||||
<span class="sd"> We refer to the `Esuli, A., Moreo, A., Sebastiani, F., & Sperduti, G. (2022).</span>
|
|
||||||
<span class="sd"> A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify.</span>
|
|
||||||
<span class="sd"> <https://ceur-ws.org/Vol-3180/paper-146.pdf>`_ for a detailed description</span>
|
|
||||||
<span class="sd"> on the tasks and datasets.</span>
|
|
||||||
|
|
||||||
<span class="sd"> The datasets are downloaded only once, and stored for fast reuse.</span>
|
|
||||||
|
|
||||||
<span class="sd"> See `lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these</span>
|
|
||||||
<span class="sd"> datasets.</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="sd"> :param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B</span>
|
|
||||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
|
||||||
<span class="sd"> ~/quay_data/ directory)</span>
|
|
||||||
<span class="sd"> :return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of</span>
|
|
||||||
<span class="sd"> :class:`quapy.data._lequa2022.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,</span>
|
|
||||||
<span class="sd"> that return a series of samples stored in a directory which are labelled by prevalence.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data._lequa2022</span> <span class="kn">import</span> <span class="n">load_raw_documents</span><span class="p">,</span> <span class="n">load_vector_documents</span><span class="p">,</span> <span class="n">SamplesFromDir</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">LEQUA2022_TASKS</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Unknown task </span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">. Valid ones are </span><span class="si">{</span><span class="n">LEQUA2022_TASKS</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">URL_TRAINDEV</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.train_dev.zip'</span>
|
|
||||||
<span class="n">URL_TEST</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.test.zip'</span>
|
|
||||||
<span class="n">URL_TEST_PREV</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.test_prevalences.zip'</span>
|
|
||||||
|
|
||||||
<span class="n">lequa_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'lequa2022'</span><span class="p">)</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">download_unzip_and_remove</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>
|
|
||||||
<span class="n">tmp_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span> <span class="o">+</span> <span class="s1">'_tmp.zip'</span><span class="p">)</span>
|
|
||||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">tmp_path</span><span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
|
|
||||||
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">)</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">)):</span>
|
|
||||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TRAINDEV</span><span class="p">)</span>
|
|
||||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TEST</span><span class="p">)</span>
|
|
||||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TEST_PREV</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">task</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'T1A'</span><span class="p">,</span> <span class="s1">'T1B'</span><span class="p">]:</span>
|
|
||||||
<span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_vector_documents</span>
|
|
||||||
<span class="k">elif</span> <span class="n">task</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'T2A'</span><span class="p">,</span> <span class="s1">'T2B'</span><span class="p">]:</span>
|
|
||||||
<span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_raw_documents</span>
|
|
||||||
|
|
||||||
<span class="n">tr_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'training_data.txt'</span><span class="p">)</span>
|
|
||||||
<span class="n">train</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">tr_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">val_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'dev_samples'</span><span class="p">)</span>
|
|
||||||
<span class="n">val_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'dev_prevalences.txt'</span><span class="p">)</span>
|
|
||||||
<span class="n">val_gen</span> <span class="o">=</span> <span class="n">SamplesFromDir</span><span class="p">(</span><span class="n">val_samples_path</span><span class="p">,</span> <span class="n">val_true_prev_path</span><span class="p">,</span> <span class="n">load_fn</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">test_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'test_samples'</span><span class="p">)</span>
|
|
||||||
<span class="n">test_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'test_prevalences.txt'</span><span class="p">)</span>
|
|
||||||
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">SamplesFromDir</span><span class="p">(</span><span class="n">test_samples_path</span><span class="p">,</span> <span class="n">test_true_prev_path</span><span class="p">,</span> <span class="n">load_fn</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">train</span><span class="p">,</span> <span class="n">val_gen</span><span class="p">,</span> <span class="n">test_gen</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="fetch_IFCB"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_IFCB">[docs]</a><span class="k">def</span> <span class="nf">fetch_IFCB</span><span class="p">(</span><span class="n">single_sample_train</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Loads the IFCB dataset for quantification from `Zenodo <https://zenodo.org/records/10036244>`_ (for more</span>
|
|
||||||
<span class="sd"> information on this dataset, please follow the zenodo link).</span>
|
|
||||||
<span class="sd"> This dataset is based on the data available publicly at</span>
|
|
||||||
<span class="sd"> `WHOI-Plankton repo <https://github.com/hsosik/WHOI-Plankton>`_.</span>
|
|
||||||
<span class="sd"> The scripts for the processing are available at `P. González's repo <https://github.com/pglez82/IFCB_Zenodo>`_.</span>
|
|
||||||
<span class="sd"> Basically, this is the IFCB dataset with precomputed features for testing quantification algorithms.</span>
|
|
||||||
|
|
||||||
<span class="sd"> The datasets are downloaded only once, and stored for fast reuse.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param single_sample_train: a boolean. If true, it will return the train dataset as a</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (all examples together).</span>
|
|
||||||
<span class="sd"> If false, a generator of training samples will be returned. Each example in the training set has an individual label.</span>
|
|
||||||
<span class="sd"> :param for_model_selection: if True, then returns a split 30% of the training set (86 out of 286 samples) to be used for model selection; </span>
|
|
||||||
<span class="sd"> if False, then returns the full training set as training set and the test set as the test set</span>
|
|
||||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
|
||||||
<span class="sd"> ~/quay_data/ directory)</span>
|
|
||||||
<span class="sd"> :return: a tuple `(train, test_gen)` where `train` is an instance of</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, if `single_sample_train` is true or</span>
|
|
||||||
<span class="sd"> :class:`quapy.data._ifcb.IFCBTrainSamplesFromDir`, i.e. a sampling protocol that returns a series of samples</span>
|
|
||||||
<span class="sd"> labelled example by example. test_gen will be a :class:`quapy.data._ifcb.IFCBTestSamples`, </span>
|
|
||||||
<span class="sd"> i.e., a sampling protocol that returns a series of samples labelled by prevalence.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data._ifcb</span> <span class="kn">import</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">,</span> <span class="n">IFCBTestSamples</span><span class="p">,</span> <span class="n">get_sample_list</span><span class="p">,</span> <span class="n">generate_modelselection_split</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">URL_TRAIN</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/records/10036244/files/IFCB.train.zip'</span>
|
|
||||||
<span class="n">URL_TEST</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/records/10036244/files/IFCB.test.zip'</span>
|
|
||||||
<span class="n">URL_TEST_PREV</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/records/10036244/files/IFCB.test_prevalences.zip'</span>
|
|
||||||
|
|
||||||
<span class="n">ifcb_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'ifcb'</span><span class="p">)</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">download_unzip_and_remove</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>
|
|
||||||
<span class="n">tmp_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="s1">'ifcb_tmp.zip'</span><span class="p">)</span>
|
|
||||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">tmp_path</span><span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
|
|
||||||
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">)</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'train'</span><span class="p">)):</span>
|
|
||||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TRAIN</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'test'</span><span class="p">)):</span>
|
|
||||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TEST</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'test_prevalences.csv'</span><span class="p">)):</span>
|
|
||||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TEST_PREV</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># Load test prevalences and classes</span>
|
|
||||||
<span class="n">test_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="s1">'test_prevalences.csv'</span><span class="p">)</span>
|
|
||||||
<span class="n">test_true_prev</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">test_true_prev_path</span><span class="p">)</span>
|
|
||||||
<span class="n">classes</span> <span class="o">=</span> <span class="n">test_true_prev</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
|
|
||||||
|
|
||||||
<span class="c1">#Load train and test samples</span>
|
|
||||||
<span class="n">train_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'train'</span><span class="p">)</span>
|
|
||||||
<span class="n">test_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'test'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">for_model_selection</span><span class="p">:</span>
|
|
||||||
<span class="c1"># In this case, return 70% of training data as the training set and 30% as the test set</span>
|
|
||||||
<span class="n">samples</span> <span class="o">=</span> <span class="n">get_sample_list</span><span class="p">(</span><span class="n">train_samples_path</span><span class="p">)</span>
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">generate_modelselection_split</span><span class="p">(</span><span class="n">samples</span><span class="p">,</span> <span class="n">split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
|
|
||||||
<span class="n">train_gen</span> <span class="o">=</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">,</span> <span class="n">samples</span><span class="o">=</span><span class="n">train</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># Test prevalence is computed from class labels</span>
|
|
||||||
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">IFCBTestSamples</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">test_prevalences</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">samples</span><span class="o">=</span><span class="n">test</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="c1"># In this case, we use all training samples as the training set and the test samples as the test set</span>
|
|
||||||
<span class="n">train_gen</span> <span class="o">=</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span>
|
|
||||||
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">IFCBTestSamples</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">test_samples_path</span><span class="p">,</span> <span class="n">test_prevalences</span><span class="o">=</span><span class="n">test_true_prev</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># In the case the user wants it, join all the train samples in one LabelledCollection</span>
|
|
||||||
<span class="k">if</span> <span class="n">single_sample_train</span><span class="p">:</span>
|
|
||||||
<span class="n">train</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">lc</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">train_gen</span><span class="p">()])</span>
|
|
||||||
<span class="k">return</span> <span class="n">train</span><span class="p">,</span> <span class="n">test_gen</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">train_gen</span><span class="p">,</span> <span class="n">test_gen</span></div>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,373 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.data.preprocessing — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.data.preprocessing</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.data.preprocessing</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">spmatrix</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.feature_extraction.text</span> <span class="kn">import</span> <span class="n">TfidfVectorizer</span><span class="p">,</span> <span class="n">CountVectorizer</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.preprocessing</span> <span class="kn">import</span> <span class="n">StandardScaler</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">Dataset</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">map_parallel</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">.base</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="text2tfidf">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.text2tfidf">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">text2tfidf</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span><span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">sublinear_tf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Transforms a :class:`quapy.data.base.Dataset` of textual instances into a :class:`quapy.data.base.Dataset` of</span>
|
|
||||||
<span class="sd"> tfidf weighted sparse vectors</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` where the instances of training and test collections are</span>
|
|
||||||
<span class="sd"> lists of str</span>
|
|
||||||
<span class="sd"> :param min_df: minimum number of occurrences for a word to be considered as part of the vocabulary (default 3)</span>
|
|
||||||
<span class="sd"> :param sublinear_tf: whether or not to apply the log scalling to the tf counters (default True)</span>
|
|
||||||
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
|
|
||||||
<span class="sd"> :param kwargs: the rest of parameters of the transformation (as for sklearn's</span>
|
|
||||||
<span class="sd"> `TfidfVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html>`_)</span>
|
|
||||||
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` in `csr_matrix` format (if inplace=False) or a reference to the</span>
|
|
||||||
<span class="sd"> current Dataset (if inplace=True) where the instances are stored in a `csr_matrix` of real-valued tfidf scores</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
|
||||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">vectorizer</span> <span class="o">=</span> <span class="n">TfidfVectorizer</span><span class="p">(</span><span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">sublinear_tf</span><span class="o">=</span><span class="n">sublinear_tf</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
|
||||||
<span class="n">training_documents</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">test_documents</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">vocabulary_</span>
|
|
||||||
<span class="k">return</span> <span class="n">dataset</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="reduce_columns">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.reduce_columns">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">reduce_columns</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Reduces the dimensionality of the instances, represented as a `csr_matrix` (or any subtype of</span>
|
|
||||||
<span class="sd"> `scipy.sparse.spmatrix`), of training and test documents by removing the columns of words which are not present</span>
|
|
||||||
<span class="sd"> in at least `min_df` instances in the training set</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` in which instances are represented in sparse format (any</span>
|
|
||||||
<span class="sd"> subtype of scipy.sparse.spmatrix)</span>
|
|
||||||
<span class="sd"> :param min_df: integer, minimum number of instances below which the columns are removed</span>
|
|
||||||
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
|
|
||||||
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.Dataset` (inplace=True) where the dimensions corresponding to infrequent terms</span>
|
|
||||||
<span class="sd"> in the training set have been removed</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">spmatrix</span><span class="p">)</span>
|
|
||||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">spmatrix</span><span class="p">)</span>
|
|
||||||
<span class="k">assert</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s1">'unaligned vector spaces'</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">filter_by_occurrences</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">W</span><span class="p">):</span>
|
|
||||||
<span class="n">column_prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">((</span><span class="n">X</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
|
||||||
<span class="n">take_columns</span> <span class="o">=</span> <span class="n">column_prevalence</span> <span class="o">>=</span> <span class="n">min_df</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span> <span class="n">take_columns</span><span class="p">]</span>
|
|
||||||
<span class="n">W</span> <span class="o">=</span> <span class="n">W</span><span class="p">[:,</span> <span class="n">take_columns</span><span class="p">]</span>
|
|
||||||
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">W</span>
|
|
||||||
|
|
||||||
<span class="n">Xtr</span><span class="p">,</span> <span class="n">Xte</span> <span class="o">=</span> <span class="n">filter_by_occurrences</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">Xtr</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">Xte</span>
|
|
||||||
<span class="k">return</span> <span class="n">dataset</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">Xtr</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">Xte</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="standardize">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.standardize">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">standardize</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Standardizes the real-valued columns of a :class:`quapy.data.base.Dataset`.</span>
|
|
||||||
<span class="sd"> Standardization, aka z-scoring, of a variable `X` comes down to subtracting the average and normalizing by the</span>
|
|
||||||
<span class="sd"> standard deviation.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` object</span>
|
|
||||||
<span class="sd"> :param inplace: set to True if the transformation is to be applied inplace, or to False (default) if a new</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.Dataset` is to be returned</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`quapy.data.base.Dataset`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">s</span> <span class="o">=</span> <span class="n">StandardScaler</span><span class="p">(</span><span class="n">copy</span><span class="o">=</span><span class="ow">not</span> <span class="n">inplace</span><span class="p">)</span>
|
|
||||||
<span class="n">training</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">dataset</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="index">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.index">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">index</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Indexes the tokens of a textual :class:`quapy.data.base.Dataset` of string documents.</span>
|
|
||||||
<span class="sd"> To index a document means to replace each different token by a unique numerical index.</span>
|
|
||||||
<span class="sd"> Rare words (i.e., words occurring less than `min_df` times) are replaced by a special token `UNK`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` object where the instances of training and test documents</span>
|
|
||||||
<span class="sd"> are lists of str</span>
|
|
||||||
<span class="sd"> :param min_df: minimum number of occurrences below which the term is replaced by a `UNK` index</span>
|
|
||||||
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
|
|
||||||
<span class="sd"> :param kwargs: the rest of parameters of the transformation (as for sklearn's</span>
|
|
||||||
<span class="sd"> `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>_`)</span>
|
|
||||||
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.Dataset` (inplace=True) consisting of lists of integer values representing indices.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
|
||||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">indexer</span> <span class="o">=</span> <span class="n">IndexTransformer</span><span class="p">(</span><span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
|
||||||
<span class="n">training_index</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">test_index</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">training_index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
|
|
||||||
<span class="n">test_index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">vocabulary_</span>
|
|
||||||
<span class="k">return</span> <span class="n">dataset</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">indexer</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__check_type</span><span class="p">(</span><span class="n">container</span><span class="p">,</span> <span class="n">container_type</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">element_type</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">container_type</span><span class="p">:</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">container</span><span class="p">,</span> <span class="n">container_type</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'unexpected type of container (expected </span><span class="si">{</span><span class="n">container_type</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">container</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span>
|
|
||||||
<span class="k">if</span> <span class="n">element_type</span><span class="p">:</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">container</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">element_type</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'unexpected type of element (expected </span><span class="si">{</span><span class="n">container_type</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">container</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IndexTransformer">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">IndexTransformer</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> This class implements a sklearn's-style transformer that indexes text as numerical ids for the tokens it</span>
|
|
||||||
<span class="sd"> contains, and that would be generated by sklearn's</span>
|
|
||||||
<span class="sd"> `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>`_</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param kwargs: keyworded arguments from</span>
|
|
||||||
<span class="sd"> `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>`_</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vect</span> <span class="o">=</span> <span class="n">CountVectorizer</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="c1"># a valid index is assigned after fit</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">pad</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span> <span class="c1"># a valid index is assigned after fit</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IndexTransformer.fit">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.fit">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Fits the transformer, i.e., decides on the vocabulary, given a list of strings.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: a list of strings</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">analyzer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">build_analyzer</span><span class="p">()</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">vocabulary_</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'UNK_TOKEN'</span><span class="p">],</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'UNK_INDEX'</span><span class="p">])</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">pad</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'PAD_TOKEN'</span><span class="p">],</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'PAD_INDEX'</span><span class="p">])</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IndexTransformer.transform">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.transform">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Transforms the strings in `X` as lists of numerical ids</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: a list of strings</span>
|
|
||||||
<span class="sd"> :param n_jobs: the number of parallel workers to carry out this task</span>
|
|
||||||
<span class="sd"> :return: a `np.ndarray` of numerical ids</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="c1"># given the number of tasks and the number of jobs, generates the slices for the parallel processes</span>
|
|
||||||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">!=</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'transform called before fit'</span>
|
|
||||||
<span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">map_parallel</span><span class="p">(</span><span class="n">func</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_index</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">documents</span><span class="p">):</span>
|
|
||||||
<span class="n">vocab</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="p">[[</span><span class="n">vocab</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">word</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">unk</span><span class="p">)</span> <span class="k">for</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">analyzer</span><span class="p">(</span><span class="n">doc</span><span class="p">)]</span> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">documents</span><span class="p">,</span> <span class="s1">'indexing'</span><span class="p">)]</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IndexTransformer.fit_transform">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.fit_transform">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">fit_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Fits the transform on `X` and transforms it.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: a list of strings</span>
|
|
||||||
<span class="sd"> :param n_jobs: the number of parallel workers to carry out this task</span>
|
|
||||||
<span class="sd"> :return: a `np.ndarray` of numerical ids</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IndexTransformer.vocabulary_size">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.vocabulary_size">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Gets the length of the vocabulary according to which the document tokens have been indexed</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: integer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IndexTransformer.add_word">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.add_word">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">add_word</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">word</span><span class="p">,</span> <span class="nb">id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">nogaps</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Adds a new token (regardless of whether it has been found in the text or not), with dedicated id.</span>
|
|
||||||
<span class="sd"> Useful to define special tokens for codifying unknown words, or padding tokens.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param word: string, surface form of the token</span>
|
|
||||||
<span class="sd"> :param id: integer, numerical value to assign to the token (leave as None for indicating the next valid id,</span>
|
|
||||||
<span class="sd"> default)</span>
|
|
||||||
<span class="sd"> :param nogaps: if set to True (default) asserts that the id indicated leads to no numerical gaps with</span>
|
|
||||||
<span class="sd"> precedent ids stored so far</span>
|
|
||||||
<span class="sd"> :return: integer, the numerical id for the new token</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'word </span><span class="si">{</span><span class="n">word</span><span class="si">}</span><span class="s1"> already in dictionary'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="nb">id</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="c1"># add the word with the next id</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">id2word</span> <span class="o">=</span> <span class="p">{</span><span class="n">id_</span><span class="p">:</span><span class="n">word_</span> <span class="k">for</span> <span class="n">word_</span><span class="p">,</span> <span class="n">id_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
|
||||||
<span class="k">if</span> <span class="nb">id</span> <span class="ow">in</span> <span class="n">id2word</span><span class="p">:</span>
|
|
||||||
<span class="n">old_word</span> <span class="o">=</span> <span class="n">id2word</span><span class="p">[</span><span class="nb">id</span><span class="p">]</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="nb">id</span>
|
|
||||||
<span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">old_word</span><span class="p">]</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">old_word</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">nogaps</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="nb">id</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">()</span><span class="o">+</span><span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'word </span><span class="si">{</span><span class="n">word</span><span class="si">}</span><span class="s1"> added with id </span><span class="si">{</span><span class="nb">id</span><span class="si">}</span><span class="s1">, while the current vocabulary size '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'is of </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">()</span><span class="si">}</span><span class="s1">, and id gaps are not allowed'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,244 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.data.reader — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.data.reader</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.data.reader</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">dok_matrix</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="from_text">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_text">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">from_text</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">class2int</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Reads a labelled colletion of documents.</span>
|
|
||||||
<span class="sd"> File fomart <0 or 1>\t<document>\n</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param path: path to the labelled collection</span>
|
|
||||||
<span class="sd"> :param encoding: the text encoding used to open the file</span>
|
|
||||||
<span class="sd"> :param verbose: if >0 (default) shows some progress information in standard output</span>
|
|
||||||
<span class="sd"> :return: a list of sentences, and a list of labels</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">all_sentences</span><span class="p">,</span> <span class="n">all_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">if</span> <span class="n">verbose</span><span class="o">></span><span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="n">file</span> <span class="o">=</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="sa">f</span><span class="s1">'loading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">()</span>
|
|
||||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">file</span><span class="p">:</span>
|
|
||||||
<span class="n">line</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
|
||||||
<span class="k">if</span> <span class="n">line</span><span class="p">:</span>
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="n">label</span><span class="p">,</span> <span class="n">sentence</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'</span><span class="se">\t</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="n">sentence</span> <span class="o">=</span> <span class="n">sentence</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
|
||||||
<span class="k">if</span> <span class="n">class2int</span><span class="p">:</span>
|
|
||||||
<span class="n">label</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">sentence</span><span class="p">:</span>
|
|
||||||
<span class="n">all_sentences</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sentence</span><span class="p">)</span>
|
|
||||||
<span class="n">all_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
|
|
||||||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'format error in </span><span class="si">{</span><span class="n">line</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">all_sentences</span><span class="p">,</span> <span class="n">all_labels</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="from_sparse">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_sparse">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">from_sparse</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Reads a labelled collection of real-valued instances expressed in sparse format</span>
|
|
||||||
<span class="sd"> File format <-1 or 0 or 1>[\s col(int):val(float)]\n</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param path: path to the labelled collection</span>
|
|
||||||
<span class="sd"> :return: a `csr_matrix` containing the instances (rows), and a ndarray containing the labels</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">split_col_val</span><span class="p">(</span><span class="n">col_val</span><span class="p">):</span>
|
|
||||||
<span class="n">col</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">col_val</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">':'</span><span class="p">)</span>
|
|
||||||
<span class="n">col</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">col</span><span class="p">,</span> <span class="n">val</span>
|
|
||||||
|
|
||||||
<span class="n">all_documents</span><span class="p">,</span> <span class="n">all_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="n">max_col</span> <span class="o">=</span> <span class="mi">0</span>
|
|
||||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="sa">f</span><span class="s1">'loading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">'</span><span class="p">):</span>
|
|
||||||
<span class="n">parts</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
|
|
||||||
<span class="k">if</span> <span class="n">parts</span><span class="p">:</span>
|
|
||||||
<span class="n">all_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">parts</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
|
|
||||||
<span class="n">cols</span><span class="p">,</span> <span class="n">vals</span> <span class="o">=</span> <span class="nb">zip</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">split_col_val</span><span class="p">(</span><span class="n">col_val</span><span class="p">)</span> <span class="k">for</span> <span class="n">col_val</span> <span class="ow">in</span> <span class="n">parts</span><span class="p">[</span><span class="mi">1</span><span class="p">:]])</span>
|
|
||||||
<span class="n">cols</span><span class="p">,</span> <span class="n">vals</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">cols</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">vals</span><span class="p">)</span>
|
|
||||||
<span class="n">max_col</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">max_col</span><span class="p">,</span> <span class="n">cols</span><span class="o">.</span><span class="n">max</span><span class="p">())</span>
|
|
||||||
<span class="n">all_documents</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">cols</span><span class="p">,</span> <span class="n">vals</span><span class="p">))</span>
|
|
||||||
<span class="n">n_docs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">all_labels</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">dok_matrix</span><span class="p">((</span><span class="n">n_docs</span><span class="p">,</span> <span class="n">max_col</span> <span class="o">+</span> <span class="mi">1</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">cols</span><span class="p">,</span> <span class="n">vals</span><span class="p">)</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">enumerate</span><span class="p">(</span><span class="n">all_documents</span><span class="p">),</span> <span class="n">total</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">all_documents</span><span class="p">),</span>
|
|
||||||
<span class="n">desc</span><span class="o">=</span><span class="sa">f</span><span class="s1">'\-- filling matrix of shape </span><span class="si">{</span><span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">'</span><span class="p">):</span>
|
|
||||||
<span class="n">X</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">cols</span><span class="p">]</span> <span class="o">=</span> <span class="n">vals</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">tocsr</span><span class="p">()</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">all_labels</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span>
|
|
||||||
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="from_csv">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_csv">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">from_csv</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Reads a csv file in which columns are separated by ','.</span>
|
|
||||||
<span class="sd"> File format <label>,<feat1>,<feat2>,...,<featn>\n</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param path: path to the csv file</span>
|
|
||||||
<span class="sd"> :param encoding: the text encoding used to open the file</span>
|
|
||||||
<span class="sd"> :return: a np.ndarray for the labels and a ndarray (float) for the covariates</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">instance</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="n">desc</span><span class="o">=</span><span class="sa">f</span><span class="s1">'reading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">'</span><span class="p">):</span>
|
|
||||||
<span class="n">yi</span><span class="p">,</span> <span class="o">*</span><span class="n">xi</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">','</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span><span class="n">xi</span><span class="p">)))</span>
|
|
||||||
<span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">yi</span><span class="p">)</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="reindex_labels">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.reindex_labels">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">reindex_labels</span><span class="p">(</span><span class="n">y</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes.</span>
|
|
||||||
<span class="sd"> E.g.:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> reindex_labels(['B', 'B', 'A', 'C'])</span>
|
|
||||||
<span class="sd"> >>> (array([1, 1, 0, 2]), array(['A', 'B', 'C'], dtype='<U1'))</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param y: the list or array of original labels</span>
|
|
||||||
<span class="sd"> :return: a ndarray (int) of class indexes, and a ndarray of classnames corresponding to the indexes.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="n">classnames</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span>
|
|
||||||
<span class="n">label2index</span> <span class="o">=</span> <span class="p">{</span><span class="n">label</span><span class="p">:</span> <span class="n">index</span> <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">label</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">classnames</span><span class="p">)}</span>
|
|
||||||
<span class="n">indexed</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">y</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">classnames</span><span class="p">:</span>
|
|
||||||
<span class="n">indexed</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">label</span><span class="p">]</span> <span class="o">=</span> <span class="n">label2index</span><span class="p">[</span><span class="n">label</span><span class="p">]</span>
|
|
||||||
<span class="k">return</span> <span class="n">indexed</span><span class="p">,</span> <span class="n">classnames</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="binarize">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.binarize">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Binarizes a categorical array-like collection of labels towards the positive class `pos_class`. E.g.,:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> binarize([1, 2, 3, 1, 1, 0], pos_class=2)</span>
|
|
||||||
<span class="sd"> >>> array([0, 1, 0, 0, 0, 0])</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param y: array-like of labels</span>
|
|
||||||
<span class="sd"> :param pos_class: integer, the positive class</span>
|
|
||||||
<span class="sd"> :return: a binary np.ndarray, in which values 1 corresponds to positions in whcih `y` had `pos_class` labels, and</span>
|
|
||||||
<span class="sd"> 0 otherwise</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="n">ybin</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">y</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
|
||||||
<span class="n">ybin</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">pos_class</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
|
|
||||||
<span class="k">return</span> <span class="n">ybin</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,433 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.error — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../_static/jquery.js"></script>
|
|
||||||
<script src="../../_static/underscore.js"></script>
|
|
||||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../_static/doctools.js"></script>
|
|
||||||
<script src="../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.error</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.error</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="sd">"""Implementation of error measures used for quantification"""</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">f1_score</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="from_name"><a class="viewcode-back" href="../../quapy.html#quapy.error.from_name">[docs]</a><span class="k">def</span> <span class="nf">from_name</span><span class="p">(</span><span class="n">err_name</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Gets an error function from its name. E.g., `from_name("mae")`</span>
|
|
||||||
<span class="sd"> will return function :meth:`quapy.error.mae`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param err_name: string, the error name</span>
|
|
||||||
<span class="sd"> :return: a callable implementing the requested error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">err_name</span> <span class="ow">in</span> <span class="n">ERROR_NAMES</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'unknown error </span><span class="si">{</span><span class="n">err_name</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="n">callable_error</span> <span class="o">=</span> <span class="nb">globals</span><span class="p">()[</span><span class="n">err_name</span><span class="p">]</span>
|
|
||||||
<span class="k">return</span> <span class="n">callable_error</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="f1e"><a class="viewcode-back" href="../../quapy.html#quapy.error.f1e">[docs]</a><span class="k">def</span> <span class="nf">f1e</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""F1 error: simply computes the error in terms of macro :math:`F_1`, i.e.,</span>
|
|
||||||
<span class="sd"> :math:`1-F_1^M`, where :math:`F_1` is the harmonic mean of precision and recall,</span>
|
|
||||||
<span class="sd"> defined as :math:`\\frac{2tp}{2tp+fp+fn}`, with `tp`, `fp`, and `fn` standing</span>
|
|
||||||
<span class="sd"> for true positives, false positives, and false negatives, respectively.</span>
|
|
||||||
<span class="sd"> `Macro` averaging means the :math:`F_1` is computed for each category independently,</span>
|
|
||||||
<span class="sd"> and then averaged.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param y_true: array-like of true labels</span>
|
|
||||||
<span class="sd"> :param y_pred: array-like of predicted labels</span>
|
|
||||||
<span class="sd"> :return: :math:`1-F_1^M`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="mf">1.</span> <span class="o">-</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">'macro'</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="acce"><a class="viewcode-back" href="../../quapy.html#quapy.error.acce">[docs]</a><span class="k">def</span> <span class="nf">acce</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the error in terms of 1-accuracy. The accuracy is computed as</span>
|
|
||||||
<span class="sd"> :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with `tp`, `fp`, `fn`, and `tn` standing</span>
|
|
||||||
<span class="sd"> for true positives, false positives, false negatives, and true negatives,</span>
|
|
||||||
<span class="sd"> respectively</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param y_true: array-like of true labels</span>
|
|
||||||
<span class="sd"> :param y_pred: array-like of predicted labels</span>
|
|
||||||
<span class="sd"> :return: 1-accuracy</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="mf">1.</span> <span class="o">-</span> <span class="p">(</span><span class="n">y_true</span> <span class="o">==</span> <span class="n">y_pred</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mae"><a class="viewcode-back" href="../../quapy.html#quapy.error.mae">[docs]</a><span class="k">def</span> <span class="nf">mae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :return: mean absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">ae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ae"><a class="viewcode-back" href="../../quapy.html#quapy.error.ae">[docs]</a><span class="k">def</span> <span class="nf">ae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the absolute error between the two prevalence vectors.</span>
|
|
||||||
<span class="sd"> Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
|
|
||||||
<span class="sd"> :math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}|\\hat{p}(y)-p(y)|`,</span>
|
|
||||||
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
|
||||||
<span class="sd"> :return: absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'wrong shape </span><span class="si">{</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1"> vs. </span><span class="si">{</span><span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="nae"><a class="viewcode-back" href="../../quapy.html#quapy.error.nae">[docs]</a><span class="k">def</span> <span class="nf">nae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the normalized absolute error between the two prevalence vectors.</span>
|
|
||||||
<span class="sd"> Normalized absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
|
|
||||||
<span class="sd"> :math:`NAE(p,\\hat{p})=\\frac{AE(p,\\hat{p})}{z_{AE}}`,</span>
|
|
||||||
<span class="sd"> where :math:`z_{AE}=\\frac{2(1-\\min_{y\\in \\mathcal{Y}} p(y))}{|\\mathcal{Y}|}`, and :math:`\\mathcal{Y}`</span>
|
|
||||||
<span class="sd"> are the classes of interest.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
|
||||||
<span class="sd"> :return: normalized absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'wrong shape </span><span class="si">{</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1"> vs. </span><span class="si">{</span><span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="mi">2</span><span class="o">*</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">prevs</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mnae"><a class="viewcode-back" href="../../quapy.html#quapy.error.mnae">[docs]</a><span class="k">def</span> <span class="nf">mnae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the mean normalized absolute error (see :meth:`quapy.error.nae`) across the sample pairs.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :return: mean normalized absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">nae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mse"><a class="viewcode-back" href="../../quapy.html#quapy.error.mse">[docs]</a><span class="k">def</span> <span class="nf">mse</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the</span>
|
|
||||||
<span class="sd"> true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the</span>
|
|
||||||
<span class="sd"> predicted prevalence values</span>
|
|
||||||
<span class="sd"> :return: mean squared error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">se</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="se"><a class="viewcode-back" href="../../quapy.html#quapy.error.se">[docs]</a><span class="k">def</span> <span class="nf">se</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the squared error between the two prevalence vectors.</span>
|
|
||||||
<span class="sd"> Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
|
|
||||||
<span class="sd"> :math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}(\\hat{p}(y)-p(y))^2`,</span>
|
|
||||||
<span class="sd"> where</span>
|
|
||||||
<span class="sd"> :math:`\\mathcal{Y}` are the classes of interest.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
|
||||||
<span class="sd"> :return: absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="p">((</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span> <span class="o">**</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mkld"><a class="viewcode-back" href="../../quapy.html#quapy.error.mkld">[docs]</a><span class="k">def</span> <span class="nf">mkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the</span>
|
|
||||||
<span class="sd"> sample pairs. The distributions are smoothed using the `eps` factor</span>
|
|
||||||
<span class="sd"> (see :meth:`quapy.error.smooth`).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain</span>
|
|
||||||
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
|
|
||||||
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
|
|
||||||
<span class="sd"> (which has thus to be set beforehand).</span>
|
|
||||||
<span class="sd"> :return: mean Kullback-Leibler distribution</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="kld"><a class="viewcode-back" href="../../quapy.html#quapy.error.kld">[docs]</a><span class="k">def</span> <span class="nf">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the Kullback-Leibler divergence between the two prevalence distributions.</span>
|
|
||||||
<span class="sd"> Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`</span>
|
|
||||||
<span class="sd"> is computed as</span>
|
|
||||||
<span class="sd"> :math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=</span>
|
|
||||||
<span class="sd"> \\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`,</span>
|
|
||||||
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
|
|
||||||
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain</span>
|
|
||||||
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
|
|
||||||
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
|
|
||||||
<span class="sd"> (which has thus to be set beforehand).</span>
|
|
||||||
<span class="sd"> :return: Kullback-Leibler divergence between the two distributions</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
|
|
||||||
<span class="n">smooth_prevs</span> <span class="o">=</span> <span class="n">prevs</span> <span class="o">+</span> <span class="n">eps</span>
|
|
||||||
<span class="n">smooth_prevs_hat</span> <span class="o">=</span> <span class="n">prevs_hat</span> <span class="o">+</span> <span class="n">eps</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="n">smooth_prevs</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">smooth_prevs</span><span class="o">/</span><span class="n">smooth_prevs_hat</span><span class="p">))</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mnkld"><a class="viewcode-back" href="../../quapy.html#quapy.error.mnkld">[docs]</a><span class="k">def</span> <span class="nf">mnkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`)</span>
|
|
||||||
<span class="sd"> across the sample pairs. The distributions are smoothed using the `eps` factor</span>
|
|
||||||
<span class="sd"> (see :meth:`quapy.error.smooth`).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain</span>
|
|
||||||
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
|
|
||||||
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
|
|
||||||
<span class="sd"> (which has thus to be set beforehand).</span>
|
|
||||||
<span class="sd"> :return: mean Normalized Kullback-Leibler distribution</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">nkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="nkld"><a class="viewcode-back" href="../../quapy.html#quapy.error.nkld">[docs]</a><span class="k">def</span> <span class="nf">nkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</span>
|
|
||||||
<span class="sd"> Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and</span>
|
|
||||||
<span class="sd"> :math:`\\hat{p}` is computed as</span>
|
|
||||||
<span class="sd"> math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`,</span>
|
|
||||||
<span class="sd"> where</span>
|
|
||||||
<span class="sd"> :math:`\\mathcal{Y}` are the classes of interest.</span>
|
|
||||||
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor. NKLD is not defined in cases in which the distributions</span>
|
|
||||||
<span class="sd"> contain zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample</span>
|
|
||||||
<span class="sd"> size. If `eps=None`, the sample size will be taken from the environment variable</span>
|
|
||||||
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
|
||||||
<span class="sd"> :return: Normalized Kullback-Leibler divergence between the two distributions</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">ekld</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">))</span>
|
|
||||||
<span class="k">return</span> <span class="mf">2.</span> <span class="o">*</span> <span class="n">ekld</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">ekld</span><span class="p">)</span> <span class="o">-</span> <span class="mf">1.</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mrae"><a class="viewcode-back" href="../../quapy.html#quapy.error.mrae">[docs]</a><span class="k">def</span> <span class="nf">mrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across</span>
|
|
||||||
<span class="sd"> the sample pairs. The distributions are smoothed using the `eps` factor (see</span>
|
|
||||||
<span class="sd"> :meth:`quapy.error.smooth`).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor. `mrae` is not defined in cases in which the true</span>
|
|
||||||
<span class="sd"> distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,</span>
|
|
||||||
<span class="sd"> with :math:`T` the sample size. If `eps=None`, the sample size will be taken from</span>
|
|
||||||
<span class="sd"> the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
|
||||||
<span class="sd"> :return: mean relative absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">rae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="rae"><a class="viewcode-back" href="../../quapy.html#quapy.error.rae">[docs]</a><span class="k">def</span> <span class="nf">rae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the absolute relative error between the two prevalence vectors.</span>
|
|
||||||
<span class="sd"> Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`</span>
|
|
||||||
<span class="sd"> is computed as</span>
|
|
||||||
<span class="sd"> :math:`RAE(p,\\hat{p})=</span>
|
|
||||||
<span class="sd"> \\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,</span>
|
|
||||||
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
|
|
||||||
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor. `rae` is not defined in cases in which the true distribution</span>
|
|
||||||
<span class="sd"> contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the</span>
|
|
||||||
<span class="sd"> sample size. If `eps=None`, the sample size will be taken from the environment variable</span>
|
|
||||||
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
|
||||||
<span class="sd"> :return: relative absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs_hat</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">prevs</span> <span class="o">-</span> <span class="n">prevs_hat</span><span class="p">)</span> <span class="o">/</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="nrae"><a class="viewcode-back" href="../../quapy.html#quapy.error.nrae">[docs]</a><span class="k">def</span> <span class="nf">nrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the normalized absolute relative error between the two prevalence vectors.</span>
|
|
||||||
<span class="sd"> Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`</span>
|
|
||||||
<span class="sd"> is computed as</span>
|
|
||||||
<span class="sd"> :math:`NRAE(p,\\hat{p})= \\frac{RAE(p,\\hat{p})}{z_{RAE}}`,</span>
|
|
||||||
<span class="sd"> where</span>
|
|
||||||
<span class="sd"> :math:`z_{RAE} = \\frac{|\\mathcal{Y}|-1+\\frac{1-\\min_{y\\in \\mathcal{Y}} p(y)}{\\min_{y\\in \\mathcal{Y}} p(y)}}{|\\mathcal{Y}|}`</span>
|
|
||||||
<span class="sd"> and :math:`\\mathcal{Y}` are the classes of interest.</span>
|
|
||||||
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor. `nrae` is not defined in cases in which the true distribution</span>
|
|
||||||
<span class="sd"> contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the</span>
|
|
||||||
<span class="sd"> sample size. If `eps=None`, the sample size will be taken from the environment variable</span>
|
|
||||||
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
|
||||||
<span class="sd"> :return: normalized relative absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs_hat</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
|
|
||||||
<span class="n">min_p</span> <span class="o">=</span> <span class="n">prevs</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">prevs</span> <span class="o">-</span> <span class="n">prevs_hat</span><span class="p">)</span> <span class="o">/</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="o">+</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">min_p</span><span class="p">)</span><span class="o">/</span><span class="n">min_p</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mnrae"><a class="viewcode-back" href="../../quapy.html#quapy.error.mnrae">[docs]</a><span class="k">def</span> <span class="nf">mnrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Computes the mean normalized relative absolute error (see :meth:`quapy.error.nrae`) across</span>
|
|
||||||
<span class="sd"> the sample pairs. The distributions are smoothed using the `eps` factor (see</span>
|
|
||||||
<span class="sd"> :meth:`quapy.error.smooth`).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
|
||||||
<span class="sd"> prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor. `mnrae` is not defined in cases in which the true</span>
|
|
||||||
<span class="sd"> distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,</span>
|
|
||||||
<span class="sd"> with :math:`T` the sample size. If `eps=None`, the sample size will be taken from</span>
|
|
||||||
<span class="sd"> the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
|
||||||
<span class="sd"> :return: mean normalized relative absolute error</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">nrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="smooth"><a class="viewcode-back" href="../../quapy.html#quapy.error.smooth">[docs]</a><span class="k">def</span> <span class="nf">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">""" Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as:</span>
|
|
||||||
<span class="sd"> :math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+</span>
|
|
||||||
<span class="sd"> \\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
|
||||||
<span class="sd"> :param eps: smoothing factor</span>
|
|
||||||
<span class="sd"> :return: array-like of shape `(n_classes,)` with the smoothed distribution</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">n_classes</span> <span class="o">=</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="n">prevs</span> <span class="o">+</span> <span class="n">eps</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">eps</span> <span class="o">*</span> <span class="n">n_classes</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">eps</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span>
|
|
||||||
<span class="k">if</span> <span class="n">sample_size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'eps was not defined, and qp.environ["SAMPLE_SIZE"] was not set'</span><span class="p">)</span>
|
|
||||||
<span class="n">eps</span> <span class="o">=</span> <span class="mf">1.</span> <span class="o">/</span> <span class="p">(</span><span class="mf">2.</span> <span class="o">*</span> <span class="n">sample_size</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">eps</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="n">CLASSIFICATION_ERROR</span> <span class="o">=</span> <span class="p">{</span><span class="n">f1e</span><span class="p">,</span> <span class="n">acce</span><span class="p">}</span>
|
|
||||||
<span class="n">QUANTIFICATION_ERROR</span> <span class="o">=</span> <span class="p">{</span><span class="n">mae</span><span class="p">,</span> <span class="n">mnae</span><span class="p">,</span> <span class="n">mrae</span><span class="p">,</span> <span class="n">mnrae</span><span class="p">,</span> <span class="n">mse</span><span class="p">,</span> <span class="n">mkld</span><span class="p">,</span> <span class="n">mnkld</span><span class="p">}</span>
|
|
||||||
<span class="n">QUANTIFICATION_ERROR_SINGLE</span> <span class="o">=</span> <span class="p">{</span><span class="n">ae</span><span class="p">,</span> <span class="n">nae</span><span class="p">,</span> <span class="n">rae</span><span class="p">,</span> <span class="n">nrae</span><span class="p">,</span> <span class="n">se</span><span class="p">,</span> <span class="n">kld</span><span class="p">,</span> <span class="n">nkld</span><span class="p">}</span>
|
|
||||||
<span class="n">QUANTIFICATION_ERROR_SMOOTH</span> <span class="o">=</span> <span class="p">{</span><span class="n">kld</span><span class="p">,</span> <span class="n">nkld</span><span class="p">,</span> <span class="n">rae</span><span class="p">,</span> <span class="n">nrae</span><span class="p">,</span> <span class="n">mkld</span><span class="p">,</span> <span class="n">mnkld</span><span class="p">,</span> <span class="n">mrae</span><span class="p">}</span>
|
|
||||||
<span class="n">CLASSIFICATION_ERROR_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">CLASSIFICATION_ERROR</span><span class="p">}</span>
|
|
||||||
<span class="n">QUANTIFICATION_ERROR_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR</span><span class="p">}</span>
|
|
||||||
<span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR_SINGLE</span><span class="p">}</span>
|
|
||||||
<span class="n">QUANTIFICATION_ERROR_SMOOTH_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR_SMOOTH</span><span class="p">}</span>
|
|
||||||
<span class="n">ERROR_NAMES</span> <span class="o">=</span> \
|
|
||||||
<span class="n">CLASSIFICATION_ERROR_NAMES</span> <span class="o">|</span> <span class="n">QUANTIFICATION_ERROR_NAMES</span> <span class="o">|</span> <span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span>
|
|
||||||
|
|
||||||
<span class="n">f1_error</span> <span class="o">=</span> <span class="n">f1e</span>
|
|
||||||
<span class="n">acc_error</span> <span class="o">=</span> <span class="n">acce</span>
|
|
||||||
<span class="n">mean_absolute_error</span> <span class="o">=</span> <span class="n">mae</span>
|
|
||||||
<span class="n">absolute_error</span> <span class="o">=</span> <span class="n">ae</span>
|
|
||||||
<span class="n">mean_relative_absolute_error</span> <span class="o">=</span> <span class="n">mrae</span>
|
|
||||||
<span class="n">relative_absolute_error</span> <span class="o">=</span> <span class="n">rae</span>
|
|
||||||
<span class="n">normalized_absolute_error</span> <span class="o">=</span> <span class="n">nae</span>
|
|
||||||
<span class="n">normalized_relative_absolute_error</span> <span class="o">=</span> <span class="n">nrae</span>
|
|
||||||
<span class="n">mean_normalized_absolute_error</span> <span class="o">=</span> <span class="n">mnae</span>
|
|
||||||
<span class="n">mean_normalized_relative_absolute_error</span> <span class="o">=</span> <span class="n">mnrae</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,291 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.evaluation — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../_static/jquery.js"></script>
|
|
||||||
<script src="../../_static/underscore.js"></script>
|
|
||||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../_static/doctools.js"></script>
|
|
||||||
<script src="../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.evaluation</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.evaluation</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Iterable</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">,</span> <span class="n">IterateProtocol</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="prediction"><a class="viewcode-back" href="../../quapy.html#quapy.evaluation.prediction">[docs]</a><span class="k">def</span> <span class="nf">prediction</span><span class="p">(</span>
|
|
||||||
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
|
||||||
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
|
|
||||||
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'auto'</span><span class="p">,</span>
|
|
||||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Uses a quantification model to generate predictions for the samples generated via a specific protocol.</span>
|
|
||||||
<span class="sd"> This function is central to all evaluation processes, and is endowed with an optimization to speed-up the</span>
|
|
||||||
<span class="sd"> prediction of protocols that generate samples from a large collection. The optimization applies to aggregative</span>
|
|
||||||
<span class="sd"> quantifiers only, and to OnLabelledCollectionProtocol protocols, and comes down to generating the classification</span>
|
|
||||||
<span class="sd"> predictions once and for all, and then generating samples over the classification predictions (instead of over</span>
|
|
||||||
<span class="sd"> the raw instances), so that the classifier prediction is never called again. This behaviour is obtained by</span>
|
|
||||||
<span class="sd"> setting `aggr_speedup` to 'auto' or True, and is only carried out if the overall process is convenient in terms</span>
|
|
||||||
<span class="sd"> of computations (e.g., if the number of classification predictions needed for the original collection exceed the</span>
|
|
||||||
<span class="sd"> number of classification predictions needed for all samples, then the optimization is not undertaken).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
|
|
||||||
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
|
|
||||||
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol</span>
|
|
||||||
<span class="sd"> in charge of generating the samples for which the model has to issue class prevalence predictions.</span>
|
|
||||||
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of</span>
|
|
||||||
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
|
|
||||||
<span class="sd"> in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is</span>
|
|
||||||
<span class="sd"> convenient or not. Set to False to deactivate.</span>
|
|
||||||
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
|
|
||||||
<span class="sd"> :return: a tuple `(true_prevs, estim_prevs)` in which each element in the tuple is an array of shape</span>
|
|
||||||
<span class="sd"> `(n_samples, n_classes)` containing the true, or predicted, prevalence values for each sample</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">aggr_speedup</span> <span class="ow">in</span> <span class="p">[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'auto'</span><span class="p">,</span> <span class="s1">'force'</span><span class="p">],</span> <span class="s1">'invalid value for aggr_speedup'</span>
|
|
||||||
|
|
||||||
<span class="n">sout</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">print</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">if</span> <span class="n">verbose</span> <span class="k">else</span> <span class="kc">None</span>
|
|
||||||
|
|
||||||
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">False</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">aggr_speedup</span> <span class="ow">in</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="s1">'auto'</span><span class="p">,</span> <span class="s1">'force'</span><span class="p">]:</span>
|
|
||||||
<span class="c1"># checks whether the prediction can be made more efficiently; this check consists in verifying if the model is</span>
|
|
||||||
<span class="c1"># of type aggregative, if the protocol is based on LabelledCollection, and if the total number of documents to</span>
|
|
||||||
<span class="c1"># classify using the protocol would exceed the number of test documents in the original collection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">AggregativeQuantifier</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">aggr_speedup</span> <span class="o">==</span> <span class="s1">'force'</span><span class="p">:</span>
|
|
||||||
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">True</span>
|
|
||||||
<span class="n">sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'forcing aggregative speedup'</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="s1">'sample_size'</span><span class="p">):</span>
|
|
||||||
<span class="n">nD</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">())</span>
|
|
||||||
<span class="n">samplesD</span> <span class="o">=</span> <span class="n">protocol</span><span class="o">.</span><span class="n">total</span><span class="p">()</span> <span class="o">*</span> <span class="n">protocol</span><span class="o">.</span><span class="n">sample_size</span>
|
|
||||||
<span class="k">if</span> <span class="n">nD</span> <span class="o"><</span> <span class="n">samplesD</span><span class="p">:</span>
|
|
||||||
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">True</span>
|
|
||||||
<span class="n">sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'speeding up the prediction for the aggregative quantifier, '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'total classifications </span><span class="si">{</span><span class="n">nD</span><span class="si">}</span><span class="s1"> instead of </span><span class="si">{</span><span class="n">samplesD</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">apply_optimization</span><span class="p">:</span>
|
|
||||||
<span class="n">pre_classified</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">()</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">protocol_with_predictions</span> <span class="o">=</span> <span class="n">protocol</span><span class="o">.</span><span class="n">on_preclassified_instances</span><span class="p">(</span><span class="n">pre_classified</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">__prediction_helper</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">aggregate</span><span class="p">,</span> <span class="n">protocol_with_predictions</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">__prediction_helper</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__prediction_helper</span><span class="p">(</span><span class="n">quantification_fn</span><span class="p">,</span> <span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">sample_instances</span><span class="p">,</span> <span class="n">sample_prev</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">protocol</span><span class="p">(),</span> <span class="n">total</span><span class="o">=</span><span class="n">protocol</span><span class="o">.</span><span class="n">total</span><span class="p">(),</span> <span class="n">desc</span><span class="o">=</span><span class="s1">'predicting'</span><span class="p">)</span> <span class="k">if</span> <span class="n">verbose</span> <span class="k">else</span> <span class="n">protocol</span><span class="p">():</span>
|
|
||||||
<span class="n">estim_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">quantification_fn</span><span class="p">(</span><span class="n">sample_instances</span><span class="p">))</span>
|
|
||||||
<span class="n">true_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sample_prev</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">true_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">)</span>
|
|
||||||
<span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">estim_prevs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="evaluation_report"><a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluation_report">[docs]</a><span class="k">def</span> <span class="nf">evaluation_report</span><span class="p">(</span><span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
|
||||||
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
|
|
||||||
<span class="n">error_metrics</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span><span class="n">Callable</span><span class="p">]]</span> <span class="o">=</span> <span class="s1">'mae'</span><span class="p">,</span>
|
|
||||||
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'auto'</span><span class="p">,</span>
|
|
||||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generates a report (a pandas' DataFrame) containing information of the evaluation of the model as according</span>
|
|
||||||
<span class="sd"> to a specific protocol and in terms of one or more evaluation metrics (errors).</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
|
|
||||||
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
|
|
||||||
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol</span>
|
|
||||||
<span class="sd"> in charge of generating the samples in which the model is evaluated.</span>
|
|
||||||
<span class="sd"> :param error_metrics: a string, or list of strings, representing the name(s) of an error function in `qp.error`</span>
|
|
||||||
<span class="sd"> (e.g., 'mae', the default value), or a callable function, or a list of callable functions, implementing</span>
|
|
||||||
<span class="sd"> the error function itself.</span>
|
|
||||||
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of</span>
|
|
||||||
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
|
|
||||||
<span class="sd"> in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is</span>
|
|
||||||
<span class="sd"> convenient or not. Set to False to deactivate.</span>
|
|
||||||
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
|
|
||||||
<span class="sd"> :return: a pandas' DataFrame containing the columns 'true-prev' (the true prevalence of each sample),</span>
|
|
||||||
<span class="sd"> 'estim-prev' (the prevalence estimated by the model for each sample), and as many columns as error metrics</span>
|
|
||||||
<span class="sd"> have been indicated, each displaying the score in terms of that metric for every sample.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">prediction</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="n">aggr_speedup</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">_prevalence_report</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">error_metrics</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_prevalence_report</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">error_metrics</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]]</span> <span class="o">=</span> <span class="s1">'mae'</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error_metrics</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
|
||||||
<span class="n">error_metrics</span> <span class="o">=</span> <span class="p">[</span><span class="n">error_metrics</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">error_funcs</span> <span class="o">=</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="k">else</span> <span class="n">e</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_metrics</span><span class="p">]</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="nb">hasattr</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="s1">'__call__'</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_funcs</span><span class="p">),</span> <span class="s1">'invalid error functions'</span>
|
|
||||||
<span class="n">error_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">e</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_funcs</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">row_entries</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
|
||||||
<span class="n">series</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'true-prev'</span><span class="p">:</span> <span class="n">true_prev</span><span class="p">,</span> <span class="s1">'estim-prev'</span><span class="p">:</span> <span class="n">estim_prev</span><span class="p">}</span>
|
|
||||||
<span class="k">for</span> <span class="n">error_name</span><span class="p">,</span> <span class="n">error_metric</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">error_names</span><span class="p">,</span> <span class="n">error_funcs</span><span class="p">):</span>
|
|
||||||
<span class="n">score</span> <span class="o">=</span> <span class="n">error_metric</span><span class="p">(</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">)</span>
|
|
||||||
<span class="n">series</span><span class="p">[</span><span class="n">error_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">score</span>
|
|
||||||
<span class="n">row_entries</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">series</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_records</span><span class="p">(</span><span class="n">row_entries</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">df</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="evaluate"><a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluate">[docs]</a><span class="k">def</span> <span class="nf">evaluate</span><span class="p">(</span>
|
|
||||||
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
|
||||||
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
|
|
||||||
<span class="n">error_metric</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">],</span>
|
|
||||||
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'auto'</span><span class="p">,</span>
|
|
||||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Evaluates a quantification model according to a specific sample generation protocol and in terms of one</span>
|
|
||||||
<span class="sd"> evaluation metric (error).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
|
|
||||||
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
|
|
||||||
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the</span>
|
|
||||||
<span class="sd"> protocol in charge of generating the samples in which the model is evaluated.</span>
|
|
||||||
<span class="sd"> :param error_metric: a string representing the name(s) of an error function in `qp.error`</span>
|
|
||||||
<span class="sd"> (e.g., 'mae'), or a callable function implementing the error function itself.</span>
|
|
||||||
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of</span>
|
|
||||||
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
|
|
||||||
<span class="sd"> in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is</span>
|
|
||||||
<span class="sd"> convenient or not. Set to False to deactivate.</span>
|
|
||||||
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
|
|
||||||
<span class="sd"> :return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with</span>
|
|
||||||
<span class="sd"> the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns</span>
|
|
||||||
<span class="sd"> a single float</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error_metric</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
|
||||||
<span class="n">error_metric</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error_metric</span><span class="p">)</span>
|
|
||||||
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">prediction</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="n">aggr_speedup</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">error_metric</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="evaluate_on_samples"><a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluate_on_samples">[docs]</a><span class="k">def</span> <span class="nf">evaluate_on_samples</span><span class="p">(</span>
|
|
||||||
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
|
||||||
<span class="n">samples</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">],</span>
|
|
||||||
<span class="n">error_metric</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">],</span>
|
|
||||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Evaluates a quantification model on a given set of samples and in terms of one evaluation metric (error).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
|
|
||||||
<span class="sd"> :param samples: a list of samples on which the quantifier is to be evaluated</span>
|
|
||||||
<span class="sd"> :param error_metric: a string representing the name(s) of an error function in `qp.error`</span>
|
|
||||||
<span class="sd"> (e.g., 'mae'), or a callable function implementing the error function itself.</span>
|
|
||||||
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
|
|
||||||
<span class="sd"> :return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with</span>
|
|
||||||
<span class="sd"> the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns</span>
|
|
||||||
<span class="sd"> a single float</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">IterateProtocol</span><span class="p">(</span><span class="n">samples</span><span class="p">),</span> <span class="n">error_metric</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,468 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.functional — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../_static/jquery.js"></script>
|
|
||||||
<script src="../../_static/underscore.js"></script>
|
|
||||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../_static/doctools.js"></script>
|
|
||||||
<script src="../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.functional</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.functional</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">scipy</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="prevalence_linspace"><a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_linspace">[docs]</a><span class="k">def</span> <span class="nf">prevalence_linspace</span><span class="p">(</span><span class="n">n_prevalences</span><span class="o">=</span><span class="mi">21</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mf">0.01</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Produces an array of uniformly separated values of prevalence.</span>
|
|
||||||
<span class="sd"> By default, produces an array of 21 prevalence values, with</span>
|
|
||||||
<span class="sd"> step 0.05 and with the limits smoothed, i.e.:</span>
|
|
||||||
<span class="sd"> [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99]</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param n_prevalences: the number of prevalence values to sample from the [0,1] interval (default 21)</span>
|
|
||||||
<span class="sd"> :param repeats: number of times each prevalence is to be repeated (defaults to 1)</span>
|
|
||||||
<span class="sd"> :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1</span>
|
|
||||||
<span class="sd"> :return: an array of uniformly separated prevalence values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mf">0.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="n">num</span><span class="o">=</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">endpoint</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+=</span> <span class="n">smooth_limits_epsilon</span>
|
|
||||||
<span class="n">p</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-=</span> <span class="n">smooth_limits_epsilon</span>
|
|
||||||
<span class="k">if</span> <span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">></span> <span class="n">p</span><span class="p">[</span><span class="mi">1</span><span class="p">]:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the smoothing in the limits is greater than the prevalence step'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">repeats</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">repeats</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">p</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="prevalence_from_labels"><a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_from_labels">[docs]</a><span class="k">def</span> <span class="nf">prevalence_from_labels</span><span class="p">(</span><span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Computed the prevalence values from a vector of labels.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param labels: array-like of shape `(n_instances)` with the label for each instance</span>
|
|
||||||
<span class="sd"> :param classes: the class labels. This is needed in order to correctly compute the prevalence vector even when</span>
|
|
||||||
<span class="sd"> some classes have no examples.</span>
|
|
||||||
<span class="sd"> :return: an ndarray of shape `(len(classes))` with the class prevalence values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">labels</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'param labels does not seem to be a ndarray of label predictions'</span><span class="p">)</span>
|
|
||||||
<span class="n">unique</span><span class="p">,</span> <span class="n">counts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">labels</span><span class="p">,</span> <span class="n">return_counts</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">by_class</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span><span class="mi">0</span><span class="p">,</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">unique</span><span class="p">,</span> <span class="n">counts</span><span class="p">)))</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">by_class</span><span class="p">[</span><span class="n">class_</span><span class="p">]</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="n">classes</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">/=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">prevalences</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="prevalence_from_probabilities"><a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_from_probabilities">[docs]</a><span class="k">def</span> <span class="nf">prevalence_from_probabilities</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">binarize</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns a vector of prevalence values from a matrix of posterior probabilities.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param posteriors: array-like of shape `(n_instances, n_classes,)` with posterior probabilities for each class</span>
|
|
||||||
<span class="sd"> :param binarize: set to True (default is False) for computing the prevalence values on crisp decisions (i.e.,</span>
|
|
||||||
<span class="sd"> converting the vectors of posterior probabilities into class indices, by taking the argmax).</span>
|
|
||||||
<span class="sd"> :return: array of shape `(n_classes,)` containing the prevalence values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">posteriors</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'param posteriors does not seem to be a ndarray of posteior probabilities'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">binarize</span><span class="p">:</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">prevalence_from_labels</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">posteriors</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">posteriors</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">/=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">prevalences</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="as_binary_prevalence"><a class="viewcode-back" href="../../quapy.html#quapy.functional.as_binary_prevalence">[docs]</a><span class="k">def</span> <span class="nf">as_binary_prevalence</span><span class="p">(</span><span class="n">positive_prevalence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">],</span> <span class="n">clip_if_necessary</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two</span>
|
|
||||||
<span class="sd"> values representing a binary distribution.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param positive_prevalence: prevalence for the positive class</span>
|
|
||||||
<span class="sd"> :param clip_if_necessary: if True, clips the value in [0,1] in order to guarantee the resulting distribution</span>
|
|
||||||
<span class="sd"> is valid. If False, it then checks that the value is in the valid range, and raises an error if not.</span>
|
|
||||||
<span class="sd"> :return: np.ndarray of shape `(2,)`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">clip_if_necessary</span><span class="p">:</span>
|
|
||||||
<span class="n">positive_prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">positive_prevalence</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">assert</span> <span class="mi">0</span> <span class="o"><=</span> <span class="n">positive_prevalence</span> <span class="o"><=</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'the value provided is not a valid prevalence for the positive class'</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span><span class="o">-</span><span class="n">positive_prevalence</span><span class="p">,</span> <span class="n">positive_prevalence</span><span class="p">])</span><span class="o">.</span><span class="n">T</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="HellingerDistance"><a class="viewcode-back" href="../../quapy.html#quapy.functional.HellingerDistance">[docs]</a><span class="k">def</span> <span class="nf">HellingerDistance</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">Q</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Computes the Hellingher Distance (HD) between (discretized) distributions `P` and `Q`.</span>
|
|
||||||
<span class="sd"> The HD for two discrete distributions of `k` bins is defined as:</span>
|
|
||||||
|
|
||||||
<span class="sd"> .. math::</span>
|
|
||||||
<span class="sd"> HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \\sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 }</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param P: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
|
|
||||||
<span class="sd"> :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
|
|
||||||
<span class="sd"> :return: float</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">((</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">P</span><span class="p">)</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">Q</span><span class="p">))</span><span class="o">**</span><span class="mi">2</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TopsoeDistance"><a class="viewcode-back" href="../../quapy.html#quapy.functional.TopsoeDistance">[docs]</a><span class="k">def</span> <span class="nf">TopsoeDistance</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">Q</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-20</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Topsoe distance between two (discretized) distributions `P` and `Q`.</span>
|
|
||||||
<span class="sd"> The Topsoe distance for two discrete distributions of `k` bins is defined as:</span>
|
|
||||||
|
|
||||||
<span class="sd"> .. math::</span>
|
|
||||||
<span class="sd"> Topsoe(P,Q) = \\sum_{i=1}^k \\left( p_i \\log\\left(\\frac{ 2 p_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) +</span>
|
|
||||||
<span class="sd"> q_i \\log\\left(\\frac{ 2 q_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) \\right)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param P: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
|
|
||||||
<span class="sd"> :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
|
|
||||||
<span class="sd"> :return: float</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">P</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="mi">2</span><span class="o">*</span><span class="n">P</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">P</span><span class="o">+</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">))</span> <span class="o">+</span> <span class="n">Q</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="mi">2</span><span class="o">*</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">P</span><span class="o">+</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="uniform_prevalence_sampling"><a class="viewcode-back" href="../../quapy.html#quapy.functional.uniform_prevalence_sampling">[docs]</a><span class="k">def</span> <span class="nf">uniform_prevalence_sampling</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements the `Kraemer algorithm <http://www.cs.cmu.edu/~nasmith/papers/smith+tromble.tr04.pdf>`_</span>
|
|
||||||
<span class="sd"> for sampling uniformly at random from the unit simplex. This implementation is adapted from this</span>
|
|
||||||
<span class="sd"> `post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_`.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param n_classes: integer, number of classes (dimensionality of the simplex)</span>
|
|
||||||
<span class="sd"> :param size: number of samples to return</span>
|
|
||||||
<span class="sd"> :return: `np.ndarray` of shape `(size, n_classes,)` if `size>1`, or of shape `(n_classes,)` otherwise</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">n_classes</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
|
||||||
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="n">size</span><span class="p">)</span>
|
|
||||||
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">([</span><span class="mi">1</span><span class="o">-</span><span class="n">u</span><span class="p">,</span> <span class="n">u</span><span class="p">])</span><span class="o">.</span><span class="n">T</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">u</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">_0s</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
|
|
||||||
<span class="n">_1s</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
|
|
||||||
<span class="n">a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">([</span><span class="n">_0s</span><span class="p">,</span> <span class="n">u</span><span class="p">])</span>
|
|
||||||
<span class="n">b</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">([</span><span class="n">u</span><span class="p">,</span> <span class="n">_1s</span><span class="p">])</span>
|
|
||||||
<span class="n">u</span> <span class="o">=</span> <span class="n">b</span><span class="o">-</span><span class="n">a</span>
|
|
||||||
<span class="k">if</span> <span class="n">size</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="n">u</span> <span class="o">=</span> <span class="n">u</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">u</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="n">uniform_simplex_sampling</span> <span class="o">=</span> <span class="n">uniform_prevalence_sampling</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="strprev"><a class="viewcode-back" href="../../quapy.html#quapy.functional.strprev">[docs]</a><span class="k">def</span> <span class="nf">strprev</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">3</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns a string representation for a prevalence vector. E.g.,</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> strprev([1/3, 2/3], prec=2)</span>
|
|
||||||
<span class="sd"> >>> '[0.33, 0.67]'</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevalences: a vector of prevalence values</span>
|
|
||||||
<span class="sd"> :param prec: float precision</span>
|
|
||||||
<span class="sd"> :return: string</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="s1">'['</span><span class="o">+</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">p</span><span class="si">:</span><span class="s1">.</span><span class="si">{</span><span class="n">prec</span><span class="si">}</span><span class="s1">f</span><span class="si">}</span><span class="s1">'</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">prevalences</span><span class="p">])</span> <span class="o">+</span> <span class="s1">']'</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="adjusted_quantification"><a class="viewcode-back" href="../../quapy.html#quapy.functional.adjusted_quantification">[docs]</a><span class="k">def</span> <span class="nf">adjusted_quantification</span><span class="p">(</span><span class="n">prevalence_estim</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">clip</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the</span>
|
|
||||||
<span class="sd"> positive class `p` comes down to computing:</span>
|
|
||||||
|
|
||||||
<span class="sd"> .. math::</span>
|
|
||||||
<span class="sd"> ACC(p) = \\frac{ p - fpr }{ tpr - fpr }</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevalence_estim: float, the estimated value for the positive class</span>
|
|
||||||
<span class="sd"> :param tpr: float, the true positive rate of the classifier</span>
|
|
||||||
<span class="sd"> :param fpr: float, the false positive rate of the classifier</span>
|
|
||||||
<span class="sd"> :param clip: set to True (default) to clip values that might exceed the range [0,1]</span>
|
|
||||||
<span class="sd"> :return: float, the adjusted count</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">den</span> <span class="o">=</span> <span class="n">tpr</span> <span class="o">-</span> <span class="n">fpr</span>
|
|
||||||
<span class="k">if</span> <span class="n">den</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="n">den</span> <span class="o">+=</span> <span class="mf">1e-8</span>
|
|
||||||
<span class="n">adjusted</span> <span class="o">=</span> <span class="p">(</span><span class="n">prevalence_estim</span> <span class="o">-</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">/</span> <span class="n">den</span>
|
|
||||||
<span class="k">if</span> <span class="n">clip</span><span class="p">:</span>
|
|
||||||
<span class="n">adjusted</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">adjusted</span><span class="p">,</span> <span class="mf">0.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">adjusted</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="normalize_prevalence"><a class="viewcode-back" href="../../quapy.html#quapy.functional.normalize_prevalence">[docs]</a><span class="k">def</span> <span class="nf">normalize_prevalence</span><span class="p">(</span><span class="n">prevalences</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in</span>
|
|
||||||
<span class="sd"> cases in which the prevalence values are not all-zeros, and to convert the prevalence values into `1/n_classes` in</span>
|
|
||||||
<span class="sd"> cases in which all values are zero.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values</span>
|
|
||||||
<span class="sd"> :return: a normalized vector or matrix of prevalence values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevalences</span><span class="p">)</span>
|
|
||||||
<span class="n">n_classes</span> <span class="o">=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="n">accum</span> <span class="o">=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">keepdims</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">true_divide</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">accum</span><span class="p">,</span> <span class="n">where</span><span class="o">=</span><span class="n">accum</span><span class="o">></span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">allzeros</span> <span class="o">=</span> <span class="n">accum</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span><span class="o">==</span><span class="mi">0</span>
|
|
||||||
<span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">allzeros</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mf">1.</span><span class="o">/</span><span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">prevalences</span><span class="p">[</span><span class="n">accum</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span><span class="o">==</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mf">1.</span><span class="o">/</span><span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">prevalences</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__num_prevalence_combinations_depr</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Computes the number of prevalence combinations in the n_classes-dimensional simplex if `nprevpoints` equally distant</span>
|
|
||||||
<span class="sd"> prevalence values are generated and `n_repeats` repetitions are requested.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param n_classes: integer, number of classes</span>
|
|
||||||
<span class="sd"> :param n_prevpoints: integer, number of prevalence points.</span>
|
|
||||||
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
|
|
||||||
<span class="sd"> :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the</span>
|
|
||||||
<span class="sd"> number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">__cache</span><span class="o">=</span><span class="p">{}</span>
|
|
||||||
<span class="k">def</span> <span class="nf">__f</span><span class="p">(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="p">(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)</span> <span class="ow">in</span> <span class="n">__cache</span><span class="p">:</span> <span class="c1"># cached result</span>
|
|
||||||
<span class="k">return</span> <span class="n">__cache</span><span class="p">[(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)]</span>
|
|
||||||
<span class="k">if</span> <span class="n">nc</span><span class="o">==</span><span class="mi">1</span><span class="p">:</span> <span class="c1"># stop condition</span>
|
|
||||||
<span class="k">return</span> <span class="mi">1</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span> <span class="c1"># recursive call</span>
|
|
||||||
<span class="n">x</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">([</span><span class="n">__f</span><span class="p">(</span><span class="n">nc</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">-</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">np</span><span class="p">)])</span>
|
|
||||||
<span class="n">__cache</span><span class="p">[(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)]</span> <span class="o">=</span> <span class="n">x</span>
|
|
||||||
<span class="k">return</span> <span class="n">x</span>
|
|
||||||
<span class="k">return</span> <span class="n">__f</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_prevpoints</span><span class="p">)</span> <span class="o">*</span> <span class="n">n_repeats</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="num_prevalence_combinations"><a class="viewcode-back" href="../../quapy.html#quapy.functional.num_prevalence_combinations">[docs]</a><span class="k">def</span> <span class="nf">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if `n_prevpoints` equally</span>
|
|
||||||
<span class="sd"> distant prevalence values are generated and `n_repeats` repetitions are requested.</span>
|
|
||||||
<span class="sd"> The computation comes down to calculating:</span>
|
|
||||||
|
|
||||||
<span class="sd"> .. math::</span>
|
|
||||||
<span class="sd"> \\binom{N+C-1}{C-1} \\times r</span>
|
|
||||||
|
|
||||||
<span class="sd"> where `N` is `n_prevpoints-1`, i.e., the number of probability mass blocks to allocate, `C` is the number of</span>
|
|
||||||
<span class="sd"> classes, and `r` is `n_repeats`. This solution comes from the</span>
|
|
||||||
<span class="sd"> `Stars and Bars <https://brilliant.org/wiki/integer-equations-star-and-bars/>`_ problem.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param n_classes: integer, number of classes</span>
|
|
||||||
<span class="sd"> :param n_prevpoints: integer, number of prevalence points.</span>
|
|
||||||
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
|
|
||||||
<span class="sd"> :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the</span>
|
|
||||||
<span class="sd"> number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">N</span> <span class="o">=</span> <span class="n">n_prevpoints</span><span class="o">-</span><span class="mi">1</span>
|
|
||||||
<span class="n">C</span> <span class="o">=</span> <span class="n">n_classes</span>
|
|
||||||
<span class="n">r</span> <span class="o">=</span> <span class="n">n_repeats</span>
|
|
||||||
<span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">scipy</span><span class="o">.</span><span class="n">special</span><span class="o">.</span><span class="n">binom</span><span class="p">(</span><span class="n">N</span> <span class="o">+</span> <span class="n">C</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">C</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">r</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="get_nprevpoints_approximation"><a class="viewcode-back" href="../../quapy.html#quapy.functional.get_nprevpoints_approximation">[docs]</a><span class="k">def</span> <span class="nf">get_nprevpoints_approximation</span><span class="p">(</span><span class="n">combinations_budget</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Searches for the largest number of (equidistant) prevalence points to define for each of the `n_classes` classes so</span>
|
|
||||||
<span class="sd"> that the number of valid prevalence values generated as combinations of prevalence points (points in a</span>
|
|
||||||
<span class="sd"> `n_classes`-dimensional simplex) do not exceed combinations_budget.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param combinations_budget: integer, maximum number of combinations allowed</span>
|
|
||||||
<span class="sd"> :param n_classes: integer, number of classes</span>
|
|
||||||
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
|
|
||||||
<span class="sd"> :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">n_classes</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">n_repeats</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">combinations_budget</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'parameters must be positive integers'</span>
|
|
||||||
<span class="n">n_prevpoints</span> <span class="o">=</span> <span class="mi">1</span>
|
|
||||||
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
|
||||||
<span class="n">combinations</span> <span class="o">=</span> <span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">combinations</span> <span class="o">></span> <span class="n">combinations_budget</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">n_prevpoints</span><span class="o">-</span><span class="mi">1</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">n_prevpoints</span> <span class="o">+=</span> <span class="mi">1</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="check_prevalence_vector"><a class="viewcode-back" href="../../quapy.html#quapy.functional.check_prevalence_vector">[docs]</a><span class="k">def</span> <span class="nf">check_prevalence_vector</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">raise_exception</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">toleranze</span><span class="o">=</span><span class="mf">1e-08</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param p: the prevalence vector to check</span>
|
|
||||||
<span class="sd"> :return: True if `p` is valid, False otherwise</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">p</span><span class="o">>=</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'the prevalence vector contains negative numbers'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="kc">False</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">p</span><span class="o"><=</span><span class="mi">1</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'the prevalence vector contains values >1'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="kc">False</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">np</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">sum</span><span class="p">(),</span> <span class="mi">1</span><span class="p">,</span> <span class="n">atol</span><span class="o">=</span><span class="n">toleranze</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'the prevalence vector does not sum up to 1'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="kc">False</span>
|
|
||||||
<span class="k">return</span> <span class="kc">True</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="get_divergence"><a class="viewcode-back" href="../../quapy.html#quapy.functional.get_divergence">[docs]</a><span class="k">def</span> <span class="nf">get_divergence</span><span class="p">(</span><span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]):</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">divergence</span><span class="o">==</span><span class="s1">'HD'</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">HellingerDistance</span>
|
|
||||||
<span class="k">elif</span> <span class="n">divergence</span><span class="o">==</span><span class="s1">'topsoe'</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">TopsoeDistance</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'unknown divergence </span><span class="si">{</span><span class="n">divergence</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">callable</span><span class="p">(</span><span class="n">divergence</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="n">divergence</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'argument "divergence" not understood; use a str or a callable function'</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="argmin_prevalence"><a class="viewcode-back" href="../../quapy.html#quapy.functional.argmin_prevalence">[docs]</a><span class="k">def</span> <span class="nf">argmin_prevalence</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'optim_minimize'</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'optim_minimize'</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">optim_minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'linear_search'</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">linear_search</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'ternary_search'</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="optim_minimize"><a class="viewcode-back" href="../../quapy.html#quapy.functional.optim_minimize">[docs]</a><span class="k">def</span> <span class="nf">optim_minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Searches for the optimal prevalence values, i.e., an `n_classes`-dimensional vector of the (`n_classes`-1)-simplex</span>
|
|
||||||
<span class="sd"> that yields the smallest lost. This optimization is carried out by means of a constrained search using scipy's</span>
|
|
||||||
<span class="sd"> SLSQP routine.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param loss: (callable) the function to minimize</span>
|
|
||||||
<span class="sd"> :param n_classes: (int) the number of classes, i.e., the dimensionality of the prevalence vector</span>
|
|
||||||
<span class="sd"> :return: (ndarray) the best prevalence vector found</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">scipy</span> <span class="kn">import</span> <span class="n">optimize</span>
|
|
||||||
|
|
||||||
<span class="c1"># the initial point is set as the uniform distribution</span>
|
|
||||||
<span class="n">uniform_distribution</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">fill_value</span><span class="o">=</span><span class="mi">1</span> <span class="o">/</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,))</span>
|
|
||||||
|
|
||||||
<span class="c1"># solutions are bounded to those contained in the unit-simplex</span>
|
|
||||||
<span class="n">bounds</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">((</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_classes</span><span class="p">))</span> <span class="c1"># values in [0,1]</span>
|
|
||||||
<span class="n">constraints</span> <span class="o">=</span> <span class="p">({</span><span class="s1">'type'</span><span class="p">:</span> <span class="s1">'eq'</span><span class="p">,</span> <span class="s1">'fun'</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="mi">1</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">x</span><span class="p">)})</span> <span class="c1"># values summing up to 1</span>
|
|
||||||
<span class="n">r</span> <span class="o">=</span> <span class="n">optimize</span><span class="o">.</span><span class="n">minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">x0</span><span class="o">=</span><span class="n">uniform_distribution</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'SLSQP'</span><span class="p">,</span> <span class="n">bounds</span><span class="o">=</span><span class="n">bounds</span><span class="p">,</span> <span class="n">constraints</span><span class="o">=</span><span class="n">constraints</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">r</span><span class="o">.</span><span class="n">x</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="linear_search"><a class="viewcode-back" href="../../quapy.html#quapy.functional.linear_search">[docs]</a><span class="k">def</span> <span class="nf">linear_search</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Performs a linear search for the best prevalence value in binary problems. The search is carried out by exploring</span>
|
|
||||||
<span class="sd"> the range [0,1] stepping by 0.01. This search is inefficient, and is added only for completeness (some of the</span>
|
|
||||||
<span class="sd"> early methods in quantification literature used it, e.g., HDy). A most powerful alternative is `optim_minimize`.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param loss: (callable) the function to minimize</span>
|
|
||||||
<span class="sd"> :param n_classes: (int) the number of classes, i.e., the dimensionality of the prevalence vector</span>
|
|
||||||
<span class="sd"> :return: (ndarray) the best prevalence vector found</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">n_classes</span><span class="o">==</span><span class="mi">2</span><span class="p">,</span> <span class="s1">'linear search is only available for binary problems'</span>
|
|
||||||
|
|
||||||
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_score</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
|
||||||
<span class="k">for</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">prevalence_linspace</span><span class="p">(</span><span class="n">n_prevalences</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mf">0.0</span><span class="p">):</span>
|
|
||||||
<span class="n">score</span> <span class="o">=</span> <span class="n">loss</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev</span><span class="p">,</span> <span class="n">prev</span><span class="p">]))</span>
|
|
||||||
<span class="k">if</span> <span class="n">min_score</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">score</span> <span class="o"><</span> <span class="n">min_score</span><span class="p">:</span>
|
|
||||||
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_score</span> <span class="o">=</span> <span class="n">prev</span><span class="p">,</span> <span class="n">score</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev_selected</span><span class="p">,</span> <span class="n">prev_selected</span><span class="p">])</span></div>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,462 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.method._kdey — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../../_static/jquery.js"></script>
|
|
||||||
<script src="../../../_static/underscore.js"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../../_static/doctools.js"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.method._kdey</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.method._kdey</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.neighbors</span> <span class="kn">import</span> <span class="n">KernelDensity</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">AggregativeSoftQuantifier</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.metrics.pairwise</span> <span class="kn">import</span> <span class="n">rbf_kernel</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEBase"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase">[docs]</a><span class="k">class</span> <span class="nc">KDEBase</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Common ancestor for KDE-based methods. Implements some common routines.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">BANDWIDTH_METHOD</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'scott'</span><span class="p">,</span> <span class="s1">'silverman'</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">_check_bandwidth</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Checks that the bandwidth parameter is correct</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param bandwidth: either a string (see BANDWIDTH_METHOD) or a float</span>
|
|
||||||
<span class="sd"> :return: nothing, but raises an exception for invalid values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">bandwidth</span> <span class="ow">in</span> <span class="n">KDEBase</span><span class="o">.</span><span class="n">BANDWIDTH_METHOD</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">,</span> <span class="nb">float</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'invalid bandwidth, valid ones are </span><span class="si">{</span><span class="n">KDEBase</span><span class="o">.</span><span class="n">BANDWIDTH_METHOD</span><span class="si">}</span><span class="s1"> or float values'</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="mi">0</span> <span class="o"><</span> <span class="n">bandwidth</span> <span class="o"><</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">"the bandwith for KDEy should be in (0,1), since this method models the unit simplex"</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEBase.get_kde_function"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.get_kde_function">[docs]</a> <span class="k">def</span> <span class="nf">get_kde_function</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Wraps the KDE function from scikit-learn.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: data for which the density function is to be estimated</span>
|
|
||||||
<span class="sd"> :param bandwidth: the bandwidth of the kernel</span>
|
|
||||||
<span class="sd"> :return: a scikit-learn's KernelDensity object</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">KernelDensity</span><span class="p">(</span><span class="n">bandwidth</span><span class="o">=</span><span class="n">bandwidth</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEBase.pdf"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.pdf">[docs]</a> <span class="k">def</span> <span class="nf">pdf</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">kde</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Wraps the density evalution of scikit-learn's KDE. Scikit-learn returns log-scores (s), so this</span>
|
|
||||||
<span class="sd"> function returns :math:`e^{s}`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param kde: a previously fit KDE function</span>
|
|
||||||
<span class="sd"> :param X: the data for which the density is to be estimated</span>
|
|
||||||
<span class="sd"> :return: np.ndarray with the densities</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">kde</span><span class="o">.</span><span class="n">score_samples</span><span class="p">(</span><span class="n">X</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEBase.get_mixture_components"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.get_mixture_components">[docs]</a> <span class="k">def</span> <span class="nf">get_mixture_components</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns an array containing the mixture components, i.e., the KDE functions for each class.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param X: the data containing the covariates</span>
|
|
||||||
<span class="sd"> :param y: the class labels</span>
|
|
||||||
<span class="sd"> :param n_classes: integer, the number of classes</span>
|
|
||||||
<span class="sd"> :param bandwidth: float, the bandwidth of the kernel</span>
|
|
||||||
<span class="sd"> :return: a list of KernelDensity objects, each fitted with the corresponding class-specific covariates</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">get_kde_function</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">cat</span><span class="p">],</span> <span class="n">bandwidth</span><span class="p">)</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_classes</span><span class="p">)]</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyML"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML">[docs]</a><span class="k">class</span> <span class="nc">KDEyML</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">KDEBase</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the Kullback-Leibler divergence (KLD) as</span>
|
|
||||||
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
|
|
||||||
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which</span>
|
|
||||||
<span class="sd"> the authors show that minimizing the distribution mathing criterion for KLD is akin to performing</span>
|
|
||||||
<span class="sd"> maximum likelihood (ML).</span>
|
|
||||||
|
|
||||||
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
|
|
||||||
|
|
||||||
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
|
|
||||||
<span class="sd"> :math:`\\alpha` defined by</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
|
|
||||||
|
|
||||||
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
|
|
||||||
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
|
|
||||||
|
|
||||||
<span class="sd"> In KDEy-ML, the divergence is taken to be the Kullback-Leibler Divergence. This is equivalent to solving:</span>
|
|
||||||
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} -</span>
|
|
||||||
<span class="sd"> \\mathbb{E}_{q_{\\widetilde{U}}} \\left[ \\log \\boldsymbol{p}_{\\alpha}(\\widetilde{x}) \\right]`</span>
|
|
||||||
|
|
||||||
<span class="sd"> which corresponds to the maximum likelihood estimate.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier.</span>
|
|
||||||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
|
||||||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
|
||||||
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
|
|
||||||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
|
||||||
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
|
|
||||||
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
|
||||||
<span class="sd"> on which the predictions are to be generated.</span>
|
|
||||||
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
|
|
||||||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
|
||||||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyML.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_mixture_components</span><span class="p">(</span><span class="o">*</span><span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyML.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood</span>
|
|
||||||
<span class="sd"> of the data (i.e., that minimizes the negative log-likelihood)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param posteriors: instances in the sample converted into posterior probabilities</span>
|
|
||||||
<span class="sd"> :return: a vector of class prevalence estimates</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">RandomState</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="n">epsilon</span> <span class="o">=</span> <span class="mf">1e-10</span>
|
|
||||||
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">)</span>
|
|
||||||
<span class="n">test_densities</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">kde_i</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_i</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">neg_loglikelihood</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
|
||||||
<span class="n">test_mixture_likelihood</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prev_i</span> <span class="o">*</span> <span class="n">dens_i</span> <span class="k">for</span> <span class="n">prev_i</span><span class="p">,</span> <span class="n">dens_i</span> <span class="ow">in</span> <span class="nb">zip</span> <span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">test_densities</span><span class="p">))</span>
|
|
||||||
<span class="n">test_loglikelihood</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">test_mixture_likelihood</span> <span class="o">+</span> <span class="n">epsilon</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">test_loglikelihood</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">neg_loglikelihood</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyHD"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD">[docs]</a><span class="k">class</span> <span class="nc">KDEyHD</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">KDEBase</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the squared Hellinger Disntace (HD) as</span>
|
|
||||||
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
|
|
||||||
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which</span>
|
|
||||||
<span class="sd"> the authors proposed a Monte Carlo approach for minimizing the divergence.</span>
|
|
||||||
|
|
||||||
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
|
|
||||||
|
|
||||||
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
|
|
||||||
<span class="sd"> :math:`\\alpha` defined by</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
|
|
||||||
|
|
||||||
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
|
|
||||||
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
|
|
||||||
|
|
||||||
<span class="sd"> In KDEy-HD, the divergence is taken to be the squared Hellinger Distance, an f-divergence with corresponding</span>
|
|
||||||
<span class="sd"> f-generator function given by:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`f(u)=(\\sqrt{u}-1)^2`</span>
|
|
||||||
|
|
||||||
<span class="sd"> The authors proposed a Monte Carlo solution that relies on importance sampling:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`\\hat{D}_f(p||q)= \\frac{1}{t} \\sum_{i=1}^t f\\left(\\frac{p(x_i)}{q(x_i)}\\right) \\frac{q(x_i)}{r(x_i)}`</span>
|
|
||||||
|
|
||||||
<span class="sd"> where the datapoints (trials) :math:`x_1,\\ldots,x_t\\sim_{\\mathrm{iid}} r` with :math:`r` the</span>
|
|
||||||
<span class="sd"> uniform distribution.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier.</span>
|
|
||||||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
|
||||||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
|
||||||
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
|
|
||||||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
|
||||||
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
|
|
||||||
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
|
||||||
<span class="sd"> on which the predictions are to be generated.</span>
|
|
||||||
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
|
|
||||||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
|
||||||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
|
||||||
<span class="sd"> :param montecarlo_trials: number of Monte Carlo trials (default 10000)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="nb">str</span><span class="o">=</span><span class="s1">'HD'</span><span class="p">,</span>
|
|
||||||
<span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">montecarlo_trials</span><span class="o">=</span><span class="mi">10000</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">montecarlo_trials</span> <span class="o">=</span> <span class="n">montecarlo_trials</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyHD.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_mixture_components</span><span class="p">(</span><span class="o">*</span><span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">N</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">montecarlo_trials</span>
|
|
||||||
<span class="n">rs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span>
|
|
||||||
<span class="n">n</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">([</span><span class="n">kde_i</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">N</span><span class="o">//</span><span class="n">n</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">rs</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_i</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">])</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">kde_j</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_j</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">])</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">reference_density</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span> <span class="c1"># equiv. to (uniform @ self.reference_classwise_densities)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyHD.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
|
||||||
<span class="c1"># we retain all n*N examples (sampled from a mixture with uniform parameter), and then</span>
|
|
||||||
<span class="c1"># apply importance sampling (IS). In this version we compute D(p_alpha||q) with IS</span>
|
|
||||||
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">test_kde</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_kde_function</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
|
|
||||||
<span class="n">test_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">test_kde</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">f_squared_hellinger</span><span class="p">(</span><span class="n">u</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">u</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span>
|
|
||||||
|
|
||||||
<span class="c1"># todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="s1">'hd'</span><span class="p">:</span>
|
|
||||||
<span class="n">f</span> <span class="o">=</span> <span class="n">f_squared_hellinger</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'only squared HD is currently implemented'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">epsilon</span> <span class="o">=</span> <span class="mf">1e-10</span>
|
|
||||||
<span class="n">qs</span> <span class="o">=</span> <span class="n">test_densities</span> <span class="o">+</span> <span class="n">epsilon</span>
|
|
||||||
<span class="n">rs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_density</span> <span class="o">+</span> <span class="n">epsilon</span>
|
|
||||||
<span class="n">iw</span> <span class="o">=</span> <span class="n">qs</span><span class="o">/</span><span class="n">rs</span> <span class="c1">#importance weights</span>
|
|
||||||
<span class="n">p_class</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span> <span class="o">+</span> <span class="n">epsilon</span>
|
|
||||||
<span class="n">fracs</span> <span class="o">=</span> <span class="n">p_class</span><span class="o">/</span><span class="n">qs</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">divergence</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
|
||||||
<span class="c1"># ps / qs = (prev @ p_class) / qs = prev @ (p_class / qs) = prev @ fracs</span>
|
|
||||||
<span class="n">ps_div_qs</span> <span class="o">=</span> <span class="n">prev</span> <span class="o">@</span> <span class="n">fracs</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span> <span class="n">f</span><span class="p">(</span><span class="n">ps_div_qs</span><span class="p">)</span> <span class="o">*</span> <span class="n">iw</span> <span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyCS"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS">[docs]</a><span class="k">class</span> <span class="nc">KDEyCS</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the Cauchy-Schwarz divergence (CS) as</span>
|
|
||||||
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
|
|
||||||
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which</span>
|
|
||||||
<span class="sd"> the authors proposed a Monte Carlo approach for minimizing the divergence.</span>
|
|
||||||
|
|
||||||
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
|
|
||||||
|
|
||||||
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
|
|
||||||
<span class="sd"> :math:`\\alpha` defined by</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
|
|
||||||
|
|
||||||
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
|
|
||||||
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
|
|
||||||
|
|
||||||
<span class="sd"> In KDEy-CS, the divergence is taken to be the Cauchy-Schwarz divergence given by:</span>
|
|
||||||
|
|
||||||
<span class="sd"> :math:`\\mathcal{D}_{\\mathrm{CS}}(p||q)=-\\log\\left(\\frac{\\int p(x)q(x)dx}{\\sqrt{\\int p(x)^2dx \\int q(x)^2dx}}\\right)`</span>
|
|
||||||
|
|
||||||
<span class="sd"> The authors showed that this distribution matching admits a closed-form solution</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier.</span>
|
|
||||||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
|
||||||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
|
||||||
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
|
|
||||||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
|
||||||
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
|
|
||||||
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
|
||||||
<span class="sd"> on which the predictions are to be generated.</span>
|
|
||||||
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
|
|
||||||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="n">KDEBase</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyCS.gram_matrix_mix_sum"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.gram_matrix_mix_sum">[docs]</a> <span class="k">def</span> <span class="nf">gram_matrix_mix_sum</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="c1"># this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))</span>
|
|
||||||
<span class="c1"># to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are </span>
|
|
||||||
<span class="c1"># two "scalar matrices" (h^2)*I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)</span>
|
|
||||||
<span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span>
|
|
||||||
<span class="n">variance</span> <span class="o">=</span> <span class="mi">2</span> <span class="o">*</span> <span class="p">(</span><span class="n">h</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="n">nD</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="n">gamma</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="p">(</span><span class="mi">2</span><span class="o">*</span><span class="n">variance</span><span class="p">)</span>
|
|
||||||
<span class="n">norm_factor</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(((</span><span class="mi">2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">pi</span><span class="p">)</span><span class="o">**</span><span class="n">nD</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">variance</span><span class="o">**</span><span class="p">(</span><span class="n">nD</span><span class="p">)))</span>
|
|
||||||
<span class="n">gram</span> <span class="o">=</span> <span class="n">norm_factor</span> <span class="o">*</span> <span class="n">rbf_kernel</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">,</span> <span class="n">gamma</span><span class="o">=</span><span class="n">gamma</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">gram</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyCS.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">P</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
|
||||||
<span class="n">n</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">n</span><span class="p">)),</span> \
|
|
||||||
<span class="s1">'label name gaps not allowed in current implementation'</span>
|
|
||||||
|
|
||||||
<span class="c1"># counts_inv keeps track of the relative weight of each datapoint within its class</span>
|
|
||||||
<span class="c1"># (i.e., the weight in its KDE model)</span>
|
|
||||||
<span class="n">counts_inv</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">/</span> <span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">counts</span><span class="p">())</span>
|
|
||||||
|
|
||||||
<span class="c1"># tr_tr_sums corresponds to symbol \overline{B} in the paper</span>
|
|
||||||
<span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n</span><span class="p">,</span><span class="n">n</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
|
||||||
<span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">i</span> <span class="o">></span> <span class="n">j</span><span class="p">:</span>
|
|
||||||
<span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">j</span><span class="p">,</span><span class="n">i</span><span class="p">]</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">block</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gram_matrix_mix_sum</span><span class="p">(</span><span class="n">P</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">i</span><span class="p">],</span> <span class="n">P</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">j</span><span class="p">]</span> <span class="k">if</span> <span class="n">i</span><span class="o">!=</span><span class="n">j</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
|
||||||
<span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">block</span>
|
|
||||||
|
|
||||||
<span class="c1"># keep track of these data structures for the test phase</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">Ptr</span> <span class="o">=</span> <span class="n">P</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">ytr</span> <span class="o">=</span> <span class="n">y</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="n">tr_tr_sums</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">counts_inv</span> <span class="o">=</span> <span class="n">counts_inv</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="KDEyCS.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
|
||||||
<span class="n">Ptr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">Ptr</span>
|
|
||||||
<span class="n">Pte</span> <span class="o">=</span> <span class="n">posteriors</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ytr</span>
|
|
||||||
<span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tr_tr_sums</span>
|
|
||||||
|
|
||||||
<span class="n">M</span><span class="p">,</span> <span class="n">nD</span> <span class="o">=</span> <span class="n">Pte</span><span class="o">.</span><span class="n">shape</span>
|
|
||||||
<span class="n">Minv</span> <span class="o">=</span> <span class="p">(</span><span class="mi">1</span><span class="o">/</span><span class="n">M</span><span class="p">)</span> <span class="c1"># t in the paper</span>
|
|
||||||
<span class="n">n</span> <span class="o">=</span> <span class="n">Ptr</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="c1"># becomes a constant that does not affect the optimization, no need to compute it</span>
|
|
||||||
<span class="c1"># partC = 0.5*np.log(self.gram_matrix_mix_sum(Pte) * Kinv * Kinv)</span>
|
|
||||||
|
|
||||||
<span class="c1"># tr_te_sums corresponds to \overline{a}*(1/Li)*(1/M) in the paper (note the constants</span>
|
|
||||||
<span class="c1"># are already aggregated to tr_te_sums, so these multiplications are not carried out</span>
|
|
||||||
<span class="c1"># at each iteration of the optimization phase)</span>
|
|
||||||
<span class="n">tr_te_sums</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
|
||||||
<span class="n">tr_te_sums</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gram_matrix_mix_sum</span><span class="p">(</span><span class="n">Ptr</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">i</span><span class="p">],</span> <span class="n">Pte</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">divergence</span><span class="p">(</span><span class="n">alpha</span><span class="p">):</span>
|
|
||||||
<span class="c1"># called \overline{r} in the paper</span>
|
|
||||||
<span class="n">alpha_ratio</span> <span class="o">=</span> <span class="n">alpha</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">counts_inv</span>
|
|
||||||
|
|
||||||
<span class="c1"># recal that tr_te_sums already accounts for the constant terms (1/Li)*(1/M)</span>
|
|
||||||
<span class="n">partA</span> <span class="o">=</span> <span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="n">alpha_ratio</span> <span class="o">@</span> <span class="n">tr_te_sums</span><span class="p">)</span> <span class="o">*</span> <span class="n">Minv</span><span class="p">)</span>
|
|
||||||
<span class="n">partB</span> <span class="o">=</span> <span class="mf">0.5</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">alpha_ratio</span> <span class="o">@</span> <span class="n">tr_tr_sums</span> <span class="o">@</span> <span class="n">alpha_ratio</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">partA</span> <span class="o">+</span> <span class="n">partB</span> <span class="c1">#+ partC</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,520 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.method._neural — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../../_static/jquery.js"></script>
|
|
||||||
<script src="../../../_static/underscore.js"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../../_static/doctools.js"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.method._neural</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.method._neural</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">os</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">random</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">torch</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">torch.nn</span> <span class="kn">import</span> <span class="n">MSELoss</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">torch.nn.functional</span> <span class="kn">import</span> <span class="n">relu</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">UPP</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="o">*</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">EarlyStop</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetTrainer"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer">[docs]</a><span class="k">class</span> <span class="nc">QuaNetTrainer</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implementation of `QuaNet <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_, a neural network for</span>
|
|
||||||
<span class="sd"> quantification. This implementation uses `PyTorch <https://pytorch.org/>`_ and can take advantage of GPU</span>
|
|
||||||
<span class="sd"> for speeding-up the training phase.</span>
|
|
||||||
|
|
||||||
<span class="sd"> Example:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> import quapy as qp</span>
|
|
||||||
<span class="sd"> >>> from quapy.method.meta import QuaNet</span>
|
|
||||||
<span class="sd"> >>> from quapy.classification.neural import NeuralClassifierTrainer, CNNnet</span>
|
|
||||||
<span class="sd"> >>></span>
|
|
||||||
<span class="sd"> >>> # use samples of 100 elements</span>
|
|
||||||
<span class="sd"> >>> qp.environ['SAMPLE_SIZE'] = 100</span>
|
|
||||||
<span class="sd"> >>></span>
|
|
||||||
<span class="sd"> >>> # load the kindle dataset as text, and convert words to numerical indexes</span>
|
|
||||||
<span class="sd"> >>> dataset = qp.datasets.fetch_reviews('kindle', pickle=True)</span>
|
|
||||||
<span class="sd"> >>> qp.train.preprocessing.index(dataset, min_df=5, inplace=True)</span>
|
|
||||||
<span class="sd"> >>></span>
|
|
||||||
<span class="sd"> >>> # the text classifier is a CNN trained by NeuralClassifierTrainer</span>
|
|
||||||
<span class="sd"> >>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)</span>
|
|
||||||
<span class="sd"> >>> classifier = NeuralClassifierTrainer(cnn, device='cuda')</span>
|
|
||||||
<span class="sd"> >>></span>
|
|
||||||
<span class="sd"> >>> # train QuaNet (QuaNet is an alias to QuaNetTrainer)</span>
|
|
||||||
<span class="sd"> >>> model = QuaNet(classifier, qp.environ['SAMPLE_SIZE'], device='cuda')</span>
|
|
||||||
<span class="sd"> >>> model.fit(dataset.training)</span>
|
|
||||||
<span class="sd"> >>> estim_prevalence = model.quantify(dataset.test.instances)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: an object implementing `fit` (i.e., that can be trained on labelled data),</span>
|
|
||||||
<span class="sd"> `predict_proba` (i.e., that can generate posterior probabilities of unlabelled examples) and</span>
|
|
||||||
<span class="sd"> `transform` (i.e., that can generate embedded representations of the unlabelled instances).</span>
|
|
||||||
<span class="sd"> :param sample_size: integer, the sample size; default is None, meaning that the sample size should be</span>
|
|
||||||
<span class="sd"> taken from qp.environ["SAMPLE_SIZE"]</span>
|
|
||||||
<span class="sd"> :param n_epochs: integer, maximum number of training epochs</span>
|
|
||||||
<span class="sd"> :param tr_iter_per_poch: integer, number of training iterations before considering an epoch complete</span>
|
|
||||||
<span class="sd"> :param va_iter_per_poch: integer, number of validation iterations to perform after each epoch</span>
|
|
||||||
<span class="sd"> :param lr: float, the learning rate</span>
|
|
||||||
<span class="sd"> :param lstm_hidden_size: integer, hidden dimensionality of the LSTM cells</span>
|
|
||||||
<span class="sd"> :param lstm_nlayers: integer, number of LSTM layers</span>
|
|
||||||
<span class="sd"> :param ff_layers: list of integers, dimensions of the densely-connected FF layers on top of the</span>
|
|
||||||
<span class="sd"> quantification embedding</span>
|
|
||||||
<span class="sd"> :param bidirectional: boolean, indicates whether the LSTM is bidirectional or not</span>
|
|
||||||
<span class="sd"> :param qdrop_p: float, dropout probability</span>
|
|
||||||
<span class="sd"> :param patience: integer, number of epochs showing no improvement in the validation set before stopping the</span>
|
|
||||||
<span class="sd"> training phase (early stopping)</span>
|
|
||||||
<span class="sd"> :param checkpointdir: string, a path where to store models' checkpoints</span>
|
|
||||||
<span class="sd"> :param checkpointname: string (optional), the name of the model's checkpoint</span>
|
|
||||||
<span class="sd"> :param device: string, indicate "cpu" or "cuda"</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
|
||||||
<span class="n">classifier</span><span class="p">,</span>
|
|
||||||
<span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">n_epochs</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
|
|
||||||
<span class="n">tr_iter_per_poch</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span>
|
|
||||||
<span class="n">va_iter_per_poch</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
|
|
||||||
<span class="n">lr</span><span class="o">=</span><span class="mf">1e-3</span><span class="p">,</span>
|
|
||||||
<span class="n">lstm_hidden_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
|
|
||||||
<span class="n">lstm_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="n">ff_layers</span><span class="o">=</span><span class="p">[</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">512</span><span class="p">],</span>
|
|
||||||
<span class="n">bidirectional</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
||||||
<span class="n">qdrop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span>
|
|
||||||
<span class="n">patience</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
|
|
||||||
<span class="n">checkpointdir</span><span class="o">=</span><span class="s1">'../checkpoint'</span><span class="p">,</span>
|
|
||||||
<span class="n">checkpointname</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="s1">'transform'</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'the classifier </span><span class="si">{</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not seem to be able to produce document embeddings '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'since it does not implement the method "transform"'</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="s1">'predict_proba'</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'the classifier </span><span class="si">{</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not seem to be able to produce posterior probabilities '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'since it does not implement the method "predict_proba"'</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_epochs</span> <span class="o">=</span> <span class="n">n_epochs</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">tr_iter</span> <span class="o">=</span> <span class="n">tr_iter_per_poch</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">va_iter</span> <span class="o">=</span> <span class="n">va_iter_per_poch</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">lr</span> <span class="o">=</span> <span class="n">lr</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s1">'lstm_hidden_size'</span><span class="p">:</span> <span class="n">lstm_hidden_size</span><span class="p">,</span>
|
|
||||||
<span class="s1">'lstm_nlayers'</span><span class="p">:</span> <span class="n">lstm_nlayers</span><span class="p">,</span>
|
|
||||||
<span class="s1">'ff_layers'</span><span class="p">:</span> <span class="n">ff_layers</span><span class="p">,</span>
|
|
||||||
<span class="s1">'bidirectional'</span><span class="p">:</span> <span class="n">bidirectional</span><span class="p">,</span>
|
|
||||||
<span class="s1">'qdrop_p'</span><span class="p">:</span> <span class="n">qdrop_p</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="n">patience</span>
|
|
||||||
<span class="k">if</span> <span class="n">checkpointname</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
|
|
||||||
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">'-'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
|
|
||||||
<span class="n">checkpointname</span> <span class="o">=</span> <span class="s1">'QuaNet-'</span><span class="o">+</span><span class="n">random_code</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span> <span class="o">=</span> <span class="n">checkpointdir</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">checkpointname</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">device</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">__check_params_colision</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">())</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetTrainer.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Trains QuaNet.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param data: the training data on which to train QuaNet. If `fit_classifier=True`, the data will be split in</span>
|
|
||||||
<span class="sd"> 40/40/20 for training the classifier, training QuaNet, and validating QuaNet, respectively. If</span>
|
|
||||||
<span class="sd"> `fit_classifier=False`, the data will be split in 66/34 for training QuaNet and validating it, respectively.</span>
|
|
||||||
<span class="sd"> :param fit_classifier: if True, trains the classifier on a split containing 40% of the data</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">classes_</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
|
|
||||||
<span class="n">classifier_data</span><span class="p">,</span> <span class="n">unused_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.4</span><span class="p">)</span>
|
|
||||||
<span class="n">train_data</span><span class="p">,</span> <span class="n">valid_data</span> <span class="o">=</span> <span class="n">unused_data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.66</span><span class="p">)</span> <span class="c1"># 0.66 split of 60% makes 40% and 20%</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">classifier_data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">classifier_data</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="n">train_data</span><span class="p">,</span> <span class="n">valid_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.66</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># estimate the hard and soft stats tpr and fpr of the classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">tr_prev</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="c1"># compute the posterior probabilities of the instances</span>
|
|
||||||
<span class="n">valid_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">valid_data</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">train_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">train_data</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># turn instances' original representations into embeddings</span>
|
|
||||||
<span class="n">valid_data_embed</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">valid_data</span><span class="o">.</span><span class="n">instances</span><span class="p">),</span> <span class="n">valid_data</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span><span class="p">)</span>
|
|
||||||
<span class="n">train_data_embed</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">train_data</span><span class="o">.</span><span class="n">instances</span><span class="p">),</span> <span class="n">train_data</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s1">'cc'</span><span class="p">:</span> <span class="n">CC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
|
|
||||||
<span class="s1">'acc'</span><span class="p">:</span> <span class="n">ACC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">valid_data</span><span class="p">),</span>
|
|
||||||
<span class="s1">'pcc'</span><span class="p">:</span> <span class="n">PCC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
|
|
||||||
<span class="s1">'pacc'</span><span class="p">:</span> <span class="n">PACC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">valid_data</span><span class="p">),</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
<span class="k">if</span> <span class="n">classifier_data</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="p">[</span><span class="s1">'emq'</span><span class="p">]</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">classifier_data</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="p">{</span>
|
|
||||||
<span class="s1">'tr-loss'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="s1">'va-loss'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="s1">'tr-mae'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="s1">'va-mae'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="n">nQ</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="p">)</span>
|
|
||||||
<span class="n">nC</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span> <span class="o">=</span> <span class="n">QuaNetModule</span><span class="p">(</span>
|
|
||||||
<span class="n">doc_embedding_size</span><span class="o">=</span><span class="n">train_data_embed</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
|
|
||||||
<span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span>
|
|
||||||
<span class="n">stats_size</span><span class="o">=</span><span class="n">nQ</span><span class="o">*</span><span class="n">nC</span><span class="p">,</span>
|
|
||||||
<span class="n">order_by</span><span class="o">=</span><span class="mi">0</span> <span class="k">if</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span>
|
|
||||||
<span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">Adam</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">lr</span><span class="p">)</span>
|
|
||||||
<span class="n">early_stop</span> <span class="o">=</span> <span class="n">EarlyStop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">patience</span><span class="p">,</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">checkpoint</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span>
|
|
||||||
|
|
||||||
<span class="k">for</span> <span class="n">epoch_i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_epochs</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_epoch</span><span class="p">(</span><span class="n">train_data_embed</span><span class="p">,</span> <span class="n">train_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tr_iter</span><span class="p">,</span> <span class="n">epoch_i</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_epoch</span><span class="p">(</span><span class="n">valid_data_embed</span><span class="p">,</span> <span class="n">valid_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">va_iter</span><span class="p">,</span> <span class="n">epoch_i</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">early_stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va-loss'</span><span class="p">],</span> <span class="n">epoch_i</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">early_stop</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
|
|
||||||
<span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">state_dict</span><span class="p">(),</span> <span class="n">checkpoint</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">early_stop</span><span class="o">.</span><span class="n">STOP</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'training ended by patience exhausted; loading best model parameters in </span><span class="si">{</span><span class="n">checkpoint</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'for epoch </span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">best_epoch</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">checkpoint</span><span class="p">))</span>
|
|
||||||
<span class="k">break</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_get_aggregative_estims</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">):</span>
|
|
||||||
<span class="n">label_predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs_estim</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">quantifier</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">posteriors</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">)</span> <span class="k">else</span> <span class="n">label_predictions</span>
|
|
||||||
<span class="n">prevs_estim</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">quantifier</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">predictions</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="c1"># there is no real need for adding static estims like the TPR or FPR from training since those are constant</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">prevs_estim</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetTrainer.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">embeddings</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">quant_estims</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_aggregative_estims</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
|
||||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
|
||||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">embeddings</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span> <span class="o">==</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">'cuda'</span><span class="p">):</span>
|
|
||||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">prevalence</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span>
|
|
||||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">prevalence</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">prevalence</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">iterations</span><span class="p">,</span> <span class="n">epoch</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="p">):</span>
|
|
||||||
<span class="n">mse_loss</span> <span class="o">=</span> <span class="n">MSELoss</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="n">train</span><span class="p">)</span>
|
|
||||||
<span class="n">losses</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">mae_errors</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">sampler</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span>
|
|
||||||
<span class="n">data</span><span class="p">,</span>
|
|
||||||
<span class="n">sample_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span>
|
|
||||||
<span class="n">repeats</span><span class="o">=</span><span class="n">iterations</span><span class="p">,</span>
|
|
||||||
<span class="n">random_state</span><span class="o">=</span><span class="kc">None</span> <span class="k">if</span> <span class="n">train</span> <span class="k">else</span> <span class="mi">0</span> <span class="c1"># different samples during train, same samples during validation</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="n">pbar</span> <span class="o">=</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">sampler</span><span class="o">.</span><span class="n">samples_parameters</span><span class="p">(),</span> <span class="n">total</span><span class="o">=</span><span class="n">sampler</span><span class="o">.</span><span class="n">total</span><span class="p">())</span>
|
|
||||||
<span class="k">for</span> <span class="n">it</span><span class="p">,</span> <span class="n">index</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">pbar</span><span class="p">):</span>
|
|
||||||
<span class="n">sample_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
|
||||||
<span class="n">sample_posteriors</span> <span class="o">=</span> <span class="n">posteriors</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
|
||||||
<span class="n">quant_estims</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_aggregative_estims</span><span class="p">(</span><span class="n">sample_posteriors</span><span class="p">)</span>
|
|
||||||
<span class="n">ptrue</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">([</span><span class="n">sample_data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
|
|
||||||
<span class="n">phat</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">sample_data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">sample_posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
|
|
||||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">mse_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
|
|
||||||
<span class="n">mae</span> <span class="o">=</span> <span class="n">mae_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
|
|
||||||
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
|
||||||
<span class="n">phat</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">sample_data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">sample_posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
|
|
||||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">mse_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
|
|
||||||
<span class="n">mae</span> <span class="o">=</span> <span class="n">mae_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
|
|
||||||
<span class="n">mae_errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">mae</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
|
|
||||||
|
|
||||||
<span class="n">mse</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
|
|
||||||
<span class="n">mae</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">mae_errors</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'tr-loss'</span><span class="p">]</span> <span class="o">=</span> <span class="n">mse</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'tr-mae'</span><span class="p">]</span> <span class="o">=</span> <span class="n">mae</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va-loss'</span><span class="p">]</span> <span class="o">=</span> <span class="n">mse</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va-mae'</span><span class="p">]</span> <span class="o">=</span> <span class="n">mae</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
|
|
||||||
<span class="n">pbar</span><span class="o">.</span><span class="n">set_description</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[QuaNet] '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'epoch=</span><span class="si">{</span><span class="n">epoch</span><span class="si">}</span><span class="s1"> [it=</span><span class="si">{</span><span class="n">it</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">iterations</span><span class="si">}</span><span class="s1">]</span><span class="se">\t</span><span class="s1">'</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'tr-mseloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr-loss"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> tr-maeloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr-mae"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="se">\t</span><span class="s1">'</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'val-mseloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va-loss"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> val-maeloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va-mae"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'patience=</span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">patience</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetTrainer.get_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="n">classifier_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
|
|
||||||
<span class="n">classifier_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__'</span><span class="o">+</span><span class="n">k</span><span class="p">:</span><span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span><span class="n">v</span> <span class="ow">in</span> <span class="n">classifier_params</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
|
||||||
<span class="k">return</span> <span class="p">{</span><span class="o">**</span><span class="n">classifier_params</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">}</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetTrainer.set_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
|
|
||||||
<span class="n">learner_params</span> <span class="o">=</span> <span class="p">{}</span>
|
|
||||||
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">parameters</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
|
||||||
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
|
|
||||||
<span class="k">elif</span> <span class="n">key</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'classifier__'</span><span class="p">):</span>
|
|
||||||
<span class="n">learner_params</span><span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'classifier__'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)]</span> <span class="o">=</span> <span class="n">val</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'unknown parameter '</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">learner_params</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__check_params_colision</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quanet_params</span><span class="p">,</span> <span class="n">learner_params</span><span class="p">):</span>
|
|
||||||
<span class="n">quanet_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">quanet_params</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
|
||||||
<span class="n">learner_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">learner_params</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
|
||||||
<span class="n">intersection</span> <span class="o">=</span> <span class="n">quanet_keys</span><span class="o">.</span><span class="n">intersection</span><span class="p">(</span><span class="n">learner_keys</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">intersection</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the use of parameters </span><span class="si">{</span><span class="n">intersection</span><span class="si">}</span><span class="s1"> is ambiguous sine those can refer to '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'the parameters of QuaNet or the learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetTrainer.clean_checkpoint"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.clean_checkpoint">[docs]</a> <span class="k">def</span> <span class="nf">clean_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Removes the checkpoint</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetTrainer.clean_checkpoint_dir"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.clean_checkpoint_dir">[docs]</a> <span class="k">def</span> <span class="nf">clean_checkpoint_dir</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Removes anything contained in the checkpoint directory</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">shutil</span>
|
|
||||||
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">ignore_errors</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mae_loss"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.mae_loss">[docs]</a><span class="k">def</span> <span class="nf">mae_loss</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">target</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Torch-like wrapper for the Mean Absolute Error</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param output: predictions</span>
|
|
||||||
<span class="sd"> :param target: ground truth values</span>
|
|
||||||
<span class="sd"> :return: mean absolute error loss</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">output</span> <span class="o">-</span> <span class="n">target</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetModule"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetModule">[docs]</a><span class="k">class</span> <span class="nc">QuaNetModule</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements the `QuaNet <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_ forward pass.</span>
|
|
||||||
<span class="sd"> See :class:`QuaNetTrainer` for training QuaNet.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param doc_embedding_size: integer, the dimensionality of the document embeddings</span>
|
|
||||||
<span class="sd"> :param n_classes: integer, number of classes</span>
|
|
||||||
<span class="sd"> :param stats_size: integer, number of statistics estimated by simple quantification methods</span>
|
|
||||||
<span class="sd"> :param lstm_hidden_size: integer, hidden dimensionality of the LSTM cell</span>
|
|
||||||
<span class="sd"> :param lstm_nlayers: integer, number of LSTM layers</span>
|
|
||||||
<span class="sd"> :param ff_layers: list of integers, dimensions of the densely-connected FF layers on top of the</span>
|
|
||||||
<span class="sd"> quantification embedding</span>
|
|
||||||
<span class="sd"> :param bidirectional: boolean, whether or not to use bidirectional LSTM</span>
|
|
||||||
<span class="sd"> :param qdrop_p: float, dropout probability</span>
|
|
||||||
<span class="sd"> :param order_by: integer, class for which the document embeddings are to be sorted</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
|
||||||
<span class="n">doc_embedding_size</span><span class="p">,</span>
|
|
||||||
<span class="n">n_classes</span><span class="p">,</span>
|
|
||||||
<span class="n">stats_size</span><span class="p">,</span>
|
|
||||||
<span class="n">lstm_hidden_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
|
|
||||||
<span class="n">lstm_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="n">ff_layers</span><span class="o">=</span><span class="p">[</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">512</span><span class="p">],</span>
|
|
||||||
<span class="n">bidirectional</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
||||||
<span class="n">qdrop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span>
|
|
||||||
<span class="n">order_by</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">order_by</span> <span class="o">=</span> <span class="n">order_by</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span> <span class="o">=</span> <span class="n">lstm_hidden_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">=</span> <span class="n">lstm_nlayers</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="o">=</span> <span class="n">bidirectional</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span> <span class="o">=</span> <span class="mi">2</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="k">else</span> <span class="mi">1</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">qdrop_p</span> <span class="o">=</span> <span class="n">qdrop_p</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="n">doc_embedding_size</span> <span class="o">+</span> <span class="n">n_classes</span><span class="p">,</span> <span class="c1"># +n_classes stands for the posterior probs. (concatenated)</span>
|
|
||||||
<span class="n">lstm_hidden_size</span><span class="p">,</span> <span class="n">lstm_nlayers</span><span class="p">,</span> <span class="n">bidirectional</span><span class="o">=</span><span class="n">bidirectional</span><span class="p">,</span>
|
|
||||||
<span class="n">dropout</span><span class="o">=</span><span class="n">qdrop_p</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">qdrop_p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">lstm_output_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span>
|
|
||||||
<span class="n">ff_input_size</span> <span class="o">=</span> <span class="n">lstm_output_size</span> <span class="o">+</span> <span class="n">stats_size</span>
|
|
||||||
<span class="n">prev_size</span> <span class="o">=</span> <span class="n">ff_input_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">()</span>
|
|
||||||
<span class="k">for</span> <span class="n">lin_size</span> <span class="ow">in</span> <span class="n">ff_layers</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">prev_size</span><span class="p">,</span> <span class="n">lin_size</span><span class="p">))</span>
|
|
||||||
<span class="n">prev_size</span> <span class="o">=</span> <span class="n">lin_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">prev_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">device</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">'cuda'</span><span class="p">)</span> <span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span> <span class="k">else</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">'cpu'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_init_hidden</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">directions</span> <span class="o">=</span> <span class="mi">2</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="k">else</span> <span class="mi">1</span>
|
|
||||||
<span class="n">var_hidden</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">*</span> <span class="n">directions</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
|
|
||||||
<span class="n">var_cell</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">*</span> <span class="n">directions</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">:</span>
|
|
||||||
<span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span> <span class="o">=</span> <span class="n">var_hidden</span><span class="o">.</span><span class="n">cuda</span><span class="p">(),</span> <span class="n">var_cell</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="QuaNetModule.forward"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetModule.forward">[docs]</a> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">doc_posteriors</span><span class="p">,</span> <span class="n">statistics</span><span class="p">):</span>
|
|
||||||
<span class="n">device</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span>
|
|
||||||
<span class="n">doc_embeddings</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
<span class="n">doc_posteriors</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">doc_posteriors</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
<span class="n">statistics</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">statistics</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">order_by</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">order</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">doc_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">order_by</span><span class="p">])</span>
|
|
||||||
<span class="n">doc_embeddings</span> <span class="o">=</span> <span class="n">doc_embeddings</span><span class="p">[</span><span class="n">order</span><span class="p">]</span>
|
|
||||||
<span class="n">doc_posteriors</span> <span class="o">=</span> <span class="n">doc_posteriors</span><span class="p">[</span><span class="n">order</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">embeded_posteriors</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">doc_posteriors</span><span class="p">),</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># the entire set represents only one instance in quapy contexts, and so the batch_size=1</span>
|
|
||||||
<span class="c1"># the shape should be (1, number-of-instances, embedding-size + n_classes)</span>
|
|
||||||
<span class="n">embeded_posteriors</span> <span class="o">=</span> <span class="n">embeded_posteriors</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">flatten_parameters</span><span class="p">()</span>
|
|
||||||
<span class="n">_</span><span class="p">,</span> <span class="p">(</span><span class="n">rnn_hidden</span><span class="p">,</span><span class="n">_</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="p">(</span><span class="n">embeded_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_init_hidden</span><span class="p">())</span>
|
|
||||||
<span class="n">rnn_hidden</span> <span class="o">=</span> <span class="n">rnn_hidden</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
|
|
||||||
<span class="n">quant_embedding</span> <span class="o">=</span> <span class="n">rnn_hidden</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">quant_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">quant_embedding</span><span class="p">,</span> <span class="n">statistics</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="n">quant_embedding</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">linear</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span><span class="p">:</span>
|
|
||||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">relu</span><span class="p">(</span><span class="n">linear</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)))</span>
|
|
||||||
|
|
||||||
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">output</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">prevalence</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,364 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.method._threshold_optim — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../../_static/jquery.js"></script>
|
|
||||||
<script src="../../../_static/underscore.js"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../../_static/doctools.js"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.method._threshold_optim</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.method._threshold_optim</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">abstractmethod</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">BinaryAggregativeQuantifier</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ThresholdOptimization"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization">[docs]</a><span class="k">class</span> <span class="nc">ThresholdOptimization</span><span class="p">(</span><span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Abstract class of Threshold Optimization variants for :class:`ACC` as proposed by</span>
|
|
||||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
|
||||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_.</span>
|
|
||||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
|
||||||
<span class="sd"> The different variants are based on different heuristics for choosing a decision threshold</span>
|
|
||||||
<span class="sd"> that would allow for more true positives and many more false positives, on the grounds this</span>
|
|
||||||
<span class="sd"> would deliver larger denominators.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
|
||||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
|
||||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
|
||||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
|
||||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ThresholdOptimization.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.condition">[docs]</a> <span class="nd">@abstractmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements the criterion according to which the threshold should be selected.</span>
|
|
||||||
<span class="sd"> This function should return the (float) score to be minimized.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param tpr: float, true positive rate</span>
|
|
||||||
<span class="sd"> :param fpr: float, false positive rate</span>
|
|
||||||
<span class="sd"> :return: float, a score for the given `tpr` and `fpr`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ThresholdOptimization.discard"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.discard">[docs]</a> <span class="k">def</span> <span class="nf">discard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Indicates whether a combination of tpr and fpr should be discarded</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param tpr: float, true positive rate</span>
|
|
||||||
<span class="sd"> :param fpr: float, false positive rate</span>
|
|
||||||
<span class="sd"> :return: true if the combination is to be discarded, false otherwise</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="n">tpr</span> <span class="o">-</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_eval_candidate_thresholds</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Seeks for the best `tpr` and `fpr` according to the score obtained at different</span>
|
|
||||||
<span class="sd"> decision thresholds. The scoring function is implemented in function `_condition`.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param decision_scores: array-like with the classification scores</span>
|
|
||||||
<span class="sd"> :param y: predicted labels for the validation set (or for the training set via `k`-fold cross validation)</span>
|
|
||||||
<span class="sd"> :return: best `tpr` and `fpr` and `threshold` according to `_condition`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">candidate_thresholds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">candidates</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">scores</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">candidate_threshold</span> <span class="ow">in</span> <span class="n">candidate_thresholds</span><span class="p">:</span>
|
|
||||||
<span class="n">y_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="mi">1</span> <span class="o">*</span> <span class="p">(</span><span class="n">decision_scores</span> <span class="o">>=</span> <span class="n">candidate_threshold</span><span class="p">)]</span>
|
|
||||||
<span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">FN</span><span class="p">,</span> <span class="n">TN</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_table</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">)</span>
|
|
||||||
<span class="n">tpr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_tpr</span><span class="p">(</span><span class="n">TP</span><span class="p">,</span> <span class="n">FN</span><span class="p">)</span>
|
|
||||||
<span class="n">fpr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_fpr</span><span class="p">(</span><span class="n">FP</span><span class="p">,</span> <span class="n">TN</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">discard</span><span class="p">(</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">):</span>
|
|
||||||
<span class="n">candidate_score</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">condition</span><span class="p">(</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span>
|
|
||||||
<span class="n">candidates</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">candidate_threshold</span><span class="p">])</span>
|
|
||||||
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">candidate_score</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">candidates</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="c1"># if no candidate gives rise to a valid combination of tpr and fpr, this method defaults to the standard</span>
|
|
||||||
<span class="c1"># classify & count; this is akin to assign tpr=1, fpr=0, threshold=0</span>
|
|
||||||
<span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">threshold</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span>
|
|
||||||
<span class="n">candidates</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">threshold</span><span class="p">])</span>
|
|
||||||
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">candidates</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">candidates</span><span class="p">)</span>
|
|
||||||
<span class="n">candidates</span> <span class="o">=</span> <span class="n">candidates</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">scores</span><span class="p">)]</span> <span class="c1"># sort candidates by candidate_score</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">candidates</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ThresholdOptimization.aggregate_with_threshold"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregate_with_threshold">[docs]</a> <span class="k">def</span> <span class="nf">aggregate_with_threshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">,</span> <span class="n">tprs</span><span class="p">,</span> <span class="n">fprs</span><span class="p">,</span> <span class="n">thresholds</span><span class="p">):</span>
|
|
||||||
<span class="c1"># This function performs the adjusted count for given tpr, fpr, and threshold.</span>
|
|
||||||
<span class="c1"># Note that, due to broadcasting, tprs, fprs, and thresholds could be arrays of length > 1</span>
|
|
||||||
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">[:,</span> <span class="kc">None</span><span class="p">]</span> <span class="o">>=</span> <span class="n">thresholds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="p">(</span><span class="n">prevs_estims</span> <span class="o">-</span> <span class="n">fprs</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">tprs</span> <span class="o">-</span> <span class="n">fprs</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">prevs_estims</span><span class="p">,</span> <span class="n">clip_if_necessary</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">prevs_estims</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_compute_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">):</span>
|
|
||||||
<span class="n">TP</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="n">FP</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">!=</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="n">FN</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">!=</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="n">TN</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">FN</span><span class="p">,</span> <span class="n">TN</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_compute_tpr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">TP</span> <span class="o">+</span> <span class="n">FP</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="mi">1</span>
|
|
||||||
<span class="k">return</span> <span class="n">TP</span> <span class="o">/</span> <span class="p">(</span><span class="n">TP</span> <span class="o">+</span> <span class="n">FP</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_compute_fpr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">TN</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">FP</span> <span class="o">+</span> <span class="n">TN</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="mi">0</span>
|
|
||||||
<span class="k">return</span> <span class="n">FP</span> <span class="o">/</span> <span class="p">(</span><span class="n">FP</span> <span class="o">+</span> <span class="n">TN</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ThresholdOptimization.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
|
||||||
<span class="c1"># the standard behavior is to keep the best threshold only</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">tpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_eval_candidate_thresholds</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ThresholdOptimization.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
|
||||||
<span class="c1"># the standard behavior is to compute the adjusted count using the best threshold found</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate_with_threshold</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">threshold</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="T50"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.T50">[docs]</a><span class="k">class</span> <span class="nc">T50</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
|
||||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
|
||||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks</span>
|
|
||||||
<span class="sd"> for the threshold that makes `tpr` closest to 0.5.</span>
|
|
||||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
|
||||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
|
||||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
|
||||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
|
||||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="T50.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.T50.condition">[docs]</a> <span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">tpr</span> <span class="o">-</span> <span class="mf">0.5</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MAX"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MAX">[docs]</a><span class="k">class</span> <span class="nc">MAX</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
|
||||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
|
||||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks</span>
|
|
||||||
<span class="sd"> for the threshold that maximizes `tpr-fpr`.</span>
|
|
||||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
|
||||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
|
||||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
|
||||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
|
||||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MAX.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MAX.condition">[docs]</a> <span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
|
||||||
<span class="c1"># MAX strives to maximize (tpr - fpr), which is equivalent to minimize (fpr - tpr)</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="n">fpr</span> <span class="o">-</span> <span class="n">tpr</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="X"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.X">[docs]</a><span class="k">class</span> <span class="nc">X</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
|
||||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
|
||||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks</span>
|
|
||||||
<span class="sd"> for the threshold that yields `tpr=1-fpr`.</span>
|
|
||||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
|
||||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
|
||||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
|
||||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
|
||||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="X.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.X.condition">[docs]</a> <span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="p">(</span><span class="n">tpr</span> <span class="o">+</span> <span class="n">fpr</span><span class="p">))</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MS"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS">[docs]</a><span class="k">class</span> <span class="nc">MS</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Median Sweep. Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
|
||||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
|
||||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that generates</span>
|
|
||||||
<span class="sd"> class prevalence estimates for all decision thresholds and returns the median of them all.</span>
|
|
||||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
|
||||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
|
||||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
|
||||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
|
||||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MS.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.condition">[docs]</a> <span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="mi">1</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MS.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
|
||||||
<span class="c1"># keeps all candidates</span>
|
|
||||||
<span class="n">tprs_fprs_thresholds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_eval_candidate_thresholds</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">tprs</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">fprs</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">thresholds</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">]</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MS.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate_with_threshold</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tprs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fprs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">thresholds</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">ndim</span><span class="o">==</span><span class="mi">2</span><span class="p">:</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">prevalences</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MS2"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS2">[docs]</a><span class="k">class</span> <span class="nc">MS2</span><span class="p">(</span><span class="n">MS</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Median Sweep 2. Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
|
||||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
|
||||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that generates</span>
|
|
||||||
<span class="sd"> class prevalence estimates for all decision thresholds and returns the median of for cases in</span>
|
|
||||||
<span class="sd"> which `tpr-fpr>0.25`</span>
|
|
||||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
|
||||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
|
||||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
|
||||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
|
||||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MS2.discard"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS2.discard">[docs]</a> <span class="k">def</span> <span class="nf">discard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="n">tpr</span><span class="o">-</span><span class="n">fpr</span><span class="p">)</span> <span class="o"><=</span> <span class="mf">0.25</span></div></div>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,212 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.method.base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../../_static/jquery.js"></script>
|
|
||||||
<script src="../../../_static/underscore.js"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../../_static/doctools.js"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.method.base</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.method.base</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">joblib</span> <span class="kn">import</span> <span class="n">Parallel</span><span class="p">,</span> <span class="n">delayed</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="c1"># Base Quantifier abstract class</span>
|
|
||||||
<span class="c1"># ------------------------------------</span>
|
|
||||||
<div class="viewcode-block" id="BaseQuantifier"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier">[docs]</a><span class="k">class</span> <span class="nc">BaseQuantifier</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Abstract Quantifier. A quantifier is defined as an object of a class that implements the method :meth:`fit` on</span>
|
|
||||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, the method :meth:`quantify`, and the :meth:`set_params` and</span>
|
|
||||||
<span class="sd"> :meth:`get_params` for model selection (see :meth:`quapy.model_selection.GridSearchQ`)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="BaseQuantifier.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier.fit">[docs]</a> <span class="nd">@abstractmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Trains a quantifier.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="BaseQuantifier.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier.quantify">[docs]</a> <span class="nd">@abstractmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generate class prevalence estimates for the sample's instances</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param instances: array-like</span>
|
|
||||||
<span class="sd"> :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="BinaryQuantifier"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BinaryQuantifier">[docs]</a><span class="k">class</span> <span class="nc">BinaryQuantifier</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Abstract class of binary quantifiers, i.e., quantifiers estimating class prevalence values for only two classes</span>
|
|
||||||
<span class="sd"> (typically, to be interpreted as one class and its complement).</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_check_binary</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">quantifier_name</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">quantifier_name</span><span class="si">}</span><span class="s1"> works only on problems of binary classification. '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'Use the class OneVsAll to enable </span><span class="si">{</span><span class="n">quantifier_name</span><span class="si">}</span><span class="s1"> work on single-label data.'</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="OneVsAll"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAll">[docs]</a><span class="k">class</span> <span class="nc">OneVsAll</span><span class="p">:</span>
|
|
||||||
<span class="k">pass</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="newOneVsAll"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.newOneVsAll">[docs]</a><span class="k">def</span> <span class="nf">newOneVsAll</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be a Quantifier'</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">OneVsAllAggregative</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">OneVsAllGeneric</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="OneVsAllGeneric"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric">[docs]</a><span class="k">class</span> <span class="nc">OneVsAllGeneric</span><span class="p">(</span><span class="n">OneVsAll</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary</span>
|
|
||||||
<span class="sd"> quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be a Quantifier'</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[warning] the quantifier seems to be an instance of qp.method.aggregative.AggregativeQuantifier; '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'you might prefer instantiating </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">OneVsAllAggregative</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span> <span class="o">=</span> <span class="n">binary_quantifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="OneVsAllGeneric.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="ow">not</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> expect non-binary data'</span>
|
|
||||||
<span class="k">assert</span> <span class="n">fit_classifier</span> <span class="o">==</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'fit_classifier must be True'</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span> <span class="o">=</span> <span class="p">{</span><span class="n">c</span><span class="p">:</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">}</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_fit</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
|
|
||||||
<span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="s1">'threading'</span><span class="p">)(</span>
|
|
||||||
<span class="n">delayed</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span><span class="n">c</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="OneVsAllGeneric.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_predict</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">normalize_prevalence</span><span class="p">(</span><span class="n">prevalences</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_binary_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">X</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_binary_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
|
|
||||||
<span class="n">bindata</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">labels</span> <span class="o">==</span> <span class="n">c</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="p">[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">])</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">bindata</span><span class="p">)</span></div>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,796 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.method.meta — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../../_static/jquery.js"></script>
|
|
||||||
<script src="../../../_static/underscore.js"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../../_static/doctools.js"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.method.meta</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.method.meta</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">f1_score</span><span class="p">,</span> <span class="n">make_scorer</span><span class="p">,</span> <span class="n">accuracy_score</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchCV</span><span class="p">,</span> <span class="n">cross_val_predict</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy</span> <span class="kn">import</span> <span class="n">functional</span> <span class="k">as</span> <span class="n">F</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">CC</span><span class="p">,</span> <span class="n">ACC</span><span class="p">,</span> <span class="n">PACC</span><span class="p">,</span> <span class="n">HDy</span><span class="p">,</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span>
|
|
||||||
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">_neural</span>
|
|
||||||
<span class="k">except</span> <span class="ne">ModuleNotFoundError</span><span class="p">:</span>
|
|
||||||
<span class="n">_neural</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">_neural</span><span class="p">:</span>
|
|
||||||
<span class="n">QuaNet</span> <span class="o">=</span> <span class="n">_neural</span><span class="o">.</span><span class="n">QuaNetTrainer</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">QuaNet</span> <span class="o">=</span> <span class="s2">"QuaNet is not available due to missing torch package"</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator2"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2">[docs]</a><span class="k">class</span> <span class="nc">MedianEstimator2</span><span class="p">(</span><span class="n">BinaryQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the</span>
|
|
||||||
<span class="sd"> estimation returned by differently (hyper)parameterized base quantifiers.</span>
|
|
||||||
<span class="sd"> The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,</span>
|
|
||||||
<span class="sd"> i.e., in cases of binary quantification.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param base_quantifier: the base, binary quantifier</span>
|
|
||||||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
|
||||||
<span class="sd"> :param param_grid: the grid or parameters towards which the median will be computed</span>
|
|
||||||
<span class="sd"> :param n_jobs: number of parllel workes</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">base_quantifier</span><span class="p">:</span> <span class="n">BinaryQuantifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator2.get_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">deep</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator2.set_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
|
||||||
<span class="n">params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">model</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator2.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit</span><span class="p">,</span>
|
|
||||||
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">configs</span><span class="p">),</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="k">return</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator2.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_predict</span><span class="p">,</span>
|
|
||||||
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">models</span><span class="p">),</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator">[docs]</a><span class="k">class</span> <span class="nc">MedianEstimator</span><span class="p">(</span><span class="n">BinaryQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the</span>
|
|
||||||
<span class="sd"> estimation returned by differently (hyper)parameterized base quantifiers.</span>
|
|
||||||
<span class="sd"> The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,</span>
|
|
||||||
<span class="sd"> i.e., in cases of binary quantification.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param base_quantifier: the base, binary quantifier</span>
|
|
||||||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
|
||||||
<span class="sd"> :param param_grid: the grid or parameters towards which the median will be computed</span>
|
|
||||||
<span class="sd"> :param n_jobs: number of parllel workes</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">base_quantifier</span><span class="p">:</span> <span class="n">BinaryQuantifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator.get_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">deep</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator.set_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
|
||||||
<span class="n">params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">model</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_fit_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
|
||||||
<span class="n">cls_params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_params</span><span class="p">)</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="n">model</span><span class="o">.</span><span class="n">val_split</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_fit_aggregation</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
|
||||||
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">),</span> <span class="n">q_params</span><span class="p">),</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">q_params</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">model</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cls_configs</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="n">models_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_classifier</span><span class="p">,</span>
|
|
||||||
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">cls_configs</span><span class="p">),</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
|
||||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_configs</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="n">model</span><span class="o">.</span><span class="n">val_split</span><span class="p">)</span>
|
|
||||||
<span class="n">models_preds</span> <span class="o">=</span> <span class="p">[(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)]</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_aggregation</span><span class="p">,</span>
|
|
||||||
<span class="p">((</span><span class="n">setup</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">setup</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">models_preds</span><span class="p">,</span> <span class="n">q_configs</span><span class="p">)),</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
|
||||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit</span><span class="p">,</span>
|
|
||||||
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">configs</span><span class="p">),</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
|
||||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="k">return</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MedianEstimator.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_predict</span><span class="p">,</span>
|
|
||||||
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">models</span><span class="p">),</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
|
||||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Ensemble"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble">[docs]</a><span class="k">class</span> <span class="nc">Ensemble</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="n">VALID_POLICIES</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'ave'</span><span class="p">,</span> <span class="s1">'ptr'</span><span class="p">,</span> <span class="s1">'ds'</span><span class="p">}</span> <span class="o">|</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span>
|
|
||||||
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implementation of the Ensemble methods for quantification described by </span>
|
|
||||||
<span class="sd"> `Pérez-Gállego et al., 2017 <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_</span>
|
|
||||||
<span class="sd"> and</span>
|
|
||||||
<span class="sd"> `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
|
||||||
<span class="sd"> The policies implemented include:</span>
|
|
||||||
<span class="sd"> </span>
|
|
||||||
<span class="sd"> - Average (`policy='ave'`): computes class prevalence estimates as the average of the estimates </span>
|
|
||||||
<span class="sd"> returned by the base quantifiers.</span>
|
|
||||||
<span class="sd"> - Training Prevalence (`policy='ptr'`): applies a dynamic selection to the ensemble’s members by retaining only </span>
|
|
||||||
<span class="sd"> those members such that the class prevalence values in the samples they use as training set are closest to </span>
|
|
||||||
<span class="sd"> preliminary class prevalence estimates computed as the average of the estimates of all the members. The final </span>
|
|
||||||
<span class="sd"> estimate is recomputed by considering only the selected members.</span>
|
|
||||||
<span class="sd"> - Distribution Similarity (`policy='ds'`): performs a dynamic selection of base members by retaining</span>
|
|
||||||
<span class="sd"> the members trained on samples whose distribution of posterior probabilities is closest, in terms of the</span>
|
|
||||||
<span class="sd"> Hellinger Distance, to the distribution of posterior probabilities in the test sample</span>
|
|
||||||
<span class="sd"> - Accuracy (`policy='<valid error name>'`): performs a static selection of the ensemble members by</span>
|
|
||||||
<span class="sd"> retaining those that minimize a quantification error measure, which is passed as an argument.</span>
|
|
||||||
<span class="sd"> </span>
|
|
||||||
<span class="sd"> Example:</span>
|
|
||||||
<span class="sd"> </span>
|
|
||||||
<span class="sd"> >>> model = Ensemble(quantifier=ACC(LogisticRegression()), size=30, policy='ave', n_jobs=-1)</span>
|
|
||||||
<span class="sd"> </span>
|
|
||||||
<span class="sd"> :param quantifier: base quantification member of the ensemble </span>
|
|
||||||
<span class="sd"> :param size: number of members</span>
|
|
||||||
<span class="sd"> :param red_size: number of members to retain after selection (depending on the policy)</span>
|
|
||||||
<span class="sd"> :param min_pos: minimum number of positive instances to consider a sample as valid </span>
|
|
||||||
<span class="sd"> :param policy: the selection policy; available policies include: `ave` (default), `ptr`, `ds`, and accuracy </span>
|
|
||||||
<span class="sd"> (which is instantiated via a valid error name, e.g., `mae`)</span>
|
|
||||||
<span class="sd"> :param max_sample_size: maximum number of instances to consider in the samples (set to None </span>
|
|
||||||
<span class="sd"> to indicate no limit, default)</span>
|
|
||||||
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
|
|
||||||
<span class="sd"> validation split, or a :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
|
||||||
<span class="sd"> :param n_jobs: number of parallel workers (default 1)</span>
|
|
||||||
<span class="sd"> :param verbose: set to True (default is False) to get some information in standard output</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
|
||||||
<span class="n">quantifier</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
|
||||||
<span class="n">size</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
|
||||||
<span class="n">red_size</span><span class="o">=</span><span class="mi">25</span><span class="p">,</span>
|
|
||||||
<span class="n">min_pos</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
|
|
||||||
<span class="n">policy</span><span class="o">=</span><span class="s1">'ave'</span><span class="p">,</span>
|
|
||||||
<span class="n">max_sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">val_split</span><span class="p">:</span><span class="n">Union</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="n">policy</span> <span class="ow">in</span> <span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'unknown policy=</span><span class="si">{</span><span class="n">policy</span><span class="si">}</span><span class="s1">; valid are </span><span class="si">{</span><span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="k">assert</span> <span class="n">max_sample_size</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_sample_size</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> \
|
|
||||||
<span class="s1">'wrong value for max_sample_size; set it to a positive number or None'</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">quantifier</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">=</span> <span class="n">size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">min_pos</span> <span class="o">=</span> <span class="n">min_pos</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">red_size</span> <span class="o">=</span> <span class="n">red_size</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">=</span> <span class="n">policy</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span> <span class="o">=</span> <span class="n">max_sample_size</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_sout</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[Ensemble]'</span> <span class="o">+</span> <span class="n">msg</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Ensemble.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">val_split</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">'ds'</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'ds policy is only defined for binary quantification, but this dataset is not binary'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">val_split</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span>
|
|
||||||
|
|
||||||
<span class="c1"># randomly chooses the prevalences for each member of the ensemble (preventing classes with less than</span>
|
|
||||||
<span class="c1"># min_pos positive examples)</span>
|
|
||||||
<span class="n">sample_size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="nb">min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">))</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">_draw_simplex</span><span class="p">(</span><span class="n">ndim</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">min_val</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">min_pos</span> <span class="o">/</span> <span class="n">sample_size</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">)]</span>
|
|
||||||
|
|
||||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">'ds'</span><span class="p">:</span>
|
|
||||||
<span class="c1"># precompute the training posterior probabilities</span>
|
|
||||||
<span class="n">posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ds_policy_get_posteriors</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">is_static_policy</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">args</span> <span class="o">=</span> <span class="p">(</span>
|
|
||||||
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">val_split</span><span class="p">,</span> <span class="n">prev</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">is_static_policy</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">,</span> <span class="n">sample_size</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">prevs</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="n">_delayed_new_instance</span><span class="p">,</span>
|
|
||||||
<span class="n">tqdm</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="s1">'fitting ensamble'</span><span class="p">,</span> <span class="n">total</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="k">else</span> <span class="n">args</span><span class="p">,</span>
|
|
||||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># static selection policy (the name of a quantification-oriented error function to minimize)</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_accuracy_policy</span><span class="p">(</span><span class="n">error_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">policy</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="s1">'Fit [Done]'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Ensemble.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span><span class="n">_delayed_quantify</span><span class="p">,</span> <span class="p">((</span><span class="n">Qi</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">Qi</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">),</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">'ptr'</span><span class="p">:</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ptr_policy</span><span class="p">(</span><span class="n">predictions</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">'ds'</span><span class="p">:</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ds_policy</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">normalize_prevalence</span><span class="p">(</span><span class="n">predictions</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Ensemble.set_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility</span>
|
|
||||||
<span class="sd"> with the abstract class).</span>
|
|
||||||
<span class="sd"> Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or</span>
|
|
||||||
<span class="sd"> `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for</span>
|
|
||||||
<span class="sd"> classification (not recommended).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param parameters: dictionary</span>
|
|
||||||
<span class="sd"> :return: raises an Exception</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> should not be used within GridSearchQ; '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'instead, use Ensemble(GridSearchQ(q),...), with q a Quantifier (recommended), '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'or Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a classifier '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'l optimized for classification (not recommended).'</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Ensemble.get_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility</span>
|
|
||||||
<span class="sd"> with the abstract class).</span>
|
|
||||||
<span class="sd"> Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or</span>
|
|
||||||
<span class="sd"> `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for</span>
|
|
||||||
<span class="sd"> classification (not recommended).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param deep: for compatibility with scikit-learn</span>
|
|
||||||
<span class="sd"> :return: raises an Exception</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_accuracy_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">error_name</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Selects the red_size best performant quantifiers in a static way (i.e., dropping all non-selected instances).</span>
|
|
||||||
<span class="sd"> For each model in the ensemble, the performance is measured in terms of _error_name_ on the quantification of</span>
|
|
||||||
<span class="sd"> the samples used for training the rest of the models in the ensemble.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.evaluation</span> <span class="kn">import</span> <span class="n">evaluate_on_samples</span>
|
|
||||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error_name</span><span class="p">)</span>
|
|
||||||
<span class="n">tests</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
|
|
||||||
<span class="n">scores</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">model</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">):</span>
|
|
||||||
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">evaluate_on_samples</span><span class="p">(</span><span class="n">model</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">tests</span><span class="p">[:</span><span class="n">i</span><span class="p">]</span> <span class="o">+</span> <span class="n">tests</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">:],</span> <span class="n">error</span><span class="p">))</span>
|
|
||||||
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">scores</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span> <span class="o">=</span> <span class="n">_select_k</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_ptr_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictions</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Selects the predictions made by models that have been trained on samples with a prevalence that is most similar</span>
|
|
||||||
<span class="sd"> to a first approximation of the test prevalence as made by all models in the ensemble.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">test_prev_estim</span> <span class="o">=</span> <span class="n">predictions</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">tr_prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
|
|
||||||
<span class="n">ptr_differences</span> <span class="o">=</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mse</span><span class="p">(</span><span class="n">ptr_i</span><span class="p">,</span> <span class="n">test_prev_estim</span><span class="p">)</span> <span class="k">for</span> <span class="n">ptr_i</span> <span class="ow">in</span> <span class="n">tr_prevs</span><span class="p">]</span>
|
|
||||||
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">ptr_differences</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">_select_k</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_ds_policy_get_posteriors</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> In the original article, there are some aspects regarding this method that are not mentioned. The paper says</span>
|
|
||||||
<span class="sd"> that the distribution of posterior probabilities from training and test examples is compared by means of the</span>
|
|
||||||
<span class="sd"> Hellinger Distance. However, how these posterior probabilities are generated is not specified. In the article,</span>
|
|
||||||
<span class="sd"> a Logistic Regressor (LR) is used as the classifier device and that could be used for this purpose. However, in</span>
|
|
||||||
<span class="sd"> general, a Quantifier is not necessarily an instance of Aggreggative Probabilistic Quantifiers, and so, that the</span>
|
|
||||||
<span class="sd"> quantifier builds on top of a probabilistic classifier cannot be given for granted. Additionally, it would not</span>
|
|
||||||
<span class="sd"> be correct to generate the posterior probabilities for training instances that have concurred in training the</span>
|
|
||||||
<span class="sd"> classifier that generates them.</span>
|
|
||||||
|
|
||||||
<span class="sd"> This function thus generates the posterior probabilities for all training documents in a cross-validation way,</span>
|
|
||||||
<span class="sd"> using LR with hyperparameters that have previously been optimized via grid search in 5FCV.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param data: a LabelledCollection</span>
|
|
||||||
<span class="sd"> :return: (P,f,) where P is an ndarray containing the posterior probabilities of the training data, generated via</span>
|
|
||||||
<span class="sd"> cross-validation and using an optimized LR, and the function to be used in order to generate posterior</span>
|
|
||||||
<span class="sd"> probabilities for test instances.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">Xy</span>
|
|
||||||
<span class="n">lr_base</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">class_weight</span><span class="o">=</span><span class="s1">'balanced'</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">9</span><span class="p">)}</span>
|
|
||||||
<span class="n">optim</span> <span class="o">=</span> <span class="n">GridSearchCV</span><span class="p">(</span><span class="n">lr_base</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">param_grid</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="n">cross_val_predict</span><span class="p">(</span><span class="n">optim</span><span class="o">.</span><span class="n">best_estimator_</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'predict_proba'</span><span class="p">)</span>
|
|
||||||
<span class="n">posteriors_generator</span> <span class="o">=</span> <span class="n">optim</span><span class="o">.</span><span class="n">best_estimator_</span><span class="o">.</span><span class="n">predict_proba</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">posteriors_generator</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_ds_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">test</span><span class="p">):</span>
|
|
||||||
<span class="n">test_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span><span class="p">(</span><span class="n">test</span><span class="p">)</span>
|
|
||||||
<span class="n">test_distribution</span> <span class="o">=</span> <span class="n">get_probability_distribution</span><span class="p">(</span><span class="n">test_posteriors</span><span class="p">)</span>
|
|
||||||
<span class="n">tr_distributions</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
|
|
||||||
<span class="n">dist</span> <span class="o">=</span> <span class="p">[</span><span class="n">F</span><span class="o">.</span><span class="n">HellingerDistance</span><span class="p">(</span><span class="n">tr_dist_i</span><span class="p">,</span> <span class="n">test_distribution</span><span class="p">)</span> <span class="k">for</span> <span class="n">tr_dist_i</span> <span class="ow">in</span> <span class="n">tr_distributions</span><span class="p">]</span>
|
|
||||||
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">dist</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">_select_k</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Indicates that the quantifier is not aggregative.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: False</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="kc">False</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">probabilistic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Indicates that the quantifier is not probabilistic.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: False</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="kc">False</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="get_probability_distribution"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.get_probability_distribution">[docs]</a><span class="k">def</span> <span class="nf">get_probability_distribution</span><span class="p">(</span><span class="n">posterior_probabilities</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="mi">8</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Gets a histogram out of the posterior probabilities (only for the binary case).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param posterior_probabilities: array-like of shape `(n_instances, 2,)`</span>
|
|
||||||
<span class="sd"> :param bins: integer</span>
|
|
||||||
<span class="sd"> :return: `np.ndarray` with the relative frequencies for each bin (for the positive class only)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">posterior_probabilities</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">'the posterior probabilities do not seem to be for a binary problem'</span>
|
|
||||||
<span class="n">posterior_probabilities</span> <span class="o">=</span> <span class="n">posterior_probabilities</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span> <span class="c1"># take the positive posteriors only</span>
|
|
||||||
<span class="n">distribution</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">posterior_probabilities</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">distribution</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_select_k</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="p">[</span><span class="n">elements</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span> <span class="k">for</span> <span class="n">idx</span> <span class="ow">in</span> <span class="n">order</span><span class="p">[:</span><span class="n">k</span><span class="p">]]</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_new_instance</span><span class="p">(</span><span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="n">base_quantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">val_split</span><span class="p">,</span> <span class="n">prev</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">keep_samples</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="n">sample_size</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">fit-start for prev </span><span class="si">{</span><span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">)</span><span class="si">}</span><span class="s1">, sample_size=</span><span class="si">{</span><span class="n">sample_size</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">base_quantifier</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">val_split</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
|
||||||
<span class="k">assert</span> <span class="mi">0</span> <span class="o"><</span> <span class="n">val_split</span> <span class="o"><</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'val_split should be in (0,1)'</span>
|
|
||||||
<span class="n">data</span><span class="p">,</span> <span class="n">val_split</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mi">1</span> <span class="o">-</span> <span class="n">val_split</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">sample_index</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prev</span><span class="p">)</span>
|
|
||||||
<span class="n">sample</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">sample_index</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">sample</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">val_split</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">sample</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">tr_prevalence</span> <span class="o">=</span> <span class="n">sample</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
<span class="n">tr_distribution</span> <span class="o">=</span> <span class="n">get_probability_distribution</span><span class="p">(</span><span class="n">posteriors</span><span class="p">[</span><span class="n">sample_index</span><span class="p">])</span> <span class="k">if</span> <span class="p">(</span><span class="n">posteriors</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">\--fit-ended for prev </span><span class="si">{</span><span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">tr_prevalence</span><span class="p">,</span> <span class="n">tr_distribution</span><span class="p">,</span> <span class="n">sample</span> <span class="k">if</span> <span class="n">keep_samples</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_delayed_quantify</span><span class="p">(</span><span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="n">quantifier</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="k">return</span> <span class="n">quantifier</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_draw_simplex</span><span class="p">(</span><span class="n">ndim</span><span class="p">,</span> <span class="n">min_val</span><span class="p">,</span> <span class="n">max_trials</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns a uniform sampling from the ndim-dimensional simplex but guarantees that all dimensions</span>
|
|
||||||
<span class="sd"> are >= min_class_prev (for min_val>0, this makes the sampling not truly uniform)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param ndim: number of dimensions of the simplex</span>
|
|
||||||
<span class="sd"> :param min_val: minimum class prevalence allowed. If less than 1/ndim a ValueError will be throw since</span>
|
|
||||||
<span class="sd"> there is no possible solution.</span>
|
|
||||||
<span class="sd"> :return: a sample from the ndim-dimensional simplex that is uniform in S(ndim)-R where S(ndim) is the simplex</span>
|
|
||||||
<span class="sd"> and R is the simplex subset containing dimensions lower than min_val</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">min_val</span> <span class="o">>=</span> <span class="mi">1</span> <span class="o">/</span> <span class="n">ndim</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'no sample can be draw from the </span><span class="si">{</span><span class="n">ndim</span><span class="si">}</span><span class="s1">-dimensional simplex so that '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'all its values are >=</span><span class="si">{</span><span class="n">min_val</span><span class="si">}</span><span class="s1"> (try with a larger value for min_pos)'</span><span class="p">)</span>
|
|
||||||
<span class="n">trials</span> <span class="o">=</span> <span class="mi">0</span>
|
|
||||||
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
|
||||||
<span class="n">u</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">uniform_simplex_sampling</span><span class="p">(</span><span class="n">ndim</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="nb">all</span><span class="p">(</span><span class="n">u</span> <span class="o">>=</span> <span class="n">min_val</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="n">u</span>
|
|
||||||
<span class="n">trials</span> <span class="o">+=</span> <span class="mi">1</span>
|
|
||||||
<span class="k">if</span> <span class="n">trials</span> <span class="o">>=</span> <span class="n">max_trials</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'it looks like finding a random simplex with all its dimensions being'</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'>= </span><span class="si">{</span><span class="n">min_val</span><span class="si">}</span><span class="s1"> is unlikely (it failed after </span><span class="si">{</span><span class="n">max_trials</span><span class="si">}</span><span class="s1"> trials)'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_instantiate_ensemble</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">optim</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">optim</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">CLASSIFICATION_ERROR</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">optim</span> <span class="o">==</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">f1e</span><span class="p">:</span>
|
|
||||||
<span class="n">scoring</span> <span class="o">=</span> <span class="n">make_scorer</span><span class="p">(</span><span class="n">f1_score</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="n">optim</span> <span class="o">==</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">acce</span><span class="p">:</span>
|
|
||||||
<span class="n">scoring</span> <span class="o">=</span> <span class="n">make_scorer</span><span class="p">(</span><span class="n">accuracy_score</span><span class="p">)</span>
|
|
||||||
<span class="n">classifier</span> <span class="o">=</span> <span class="n">GridSearchCV</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">scoring</span><span class="o">=</span><span class="n">scoring</span><span class="p">)</span>
|
|
||||||
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span><span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">),</span>
|
|
||||||
<span class="n">param_grid</span><span class="o">=</span><span class="n">param_grid</span><span class="p">,</span>
|
|
||||||
<span class="o">**</span><span class="n">param_model_sel</span><span class="p">,</span>
|
|
||||||
<span class="n">error</span><span class="o">=</span><span class="n">optim</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">Ensemble</span><span class="p">(</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_check_error</span><span class="p">(</span><span class="n">error</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">error</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="kc">None</span>
|
|
||||||
<span class="k">if</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR</span> <span class="ow">or</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">CLASSIFICATION_ERROR</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">error</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'unexpected error type; must either be a callable function or a str representing</span><span class="se">\n</span><span class="s1">'</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'the name of an error function in </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">ERROR_NAMES</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ensembleFactory"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.ensembleFactory">[docs]</a><span class="k">def</span> <span class="nf">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Ensemble factory. Provides a unified interface for instantiating ensembles that can be optimized (via model</span>
|
|
||||||
<span class="sd"> selection for quantification) for a given evaluation metric using :class:`quapy.model_selection.GridSearchQ`.</span>
|
|
||||||
<span class="sd"> If the evaluation metric is classification-oriented</span>
|
|
||||||
<span class="sd"> (instead of quantification-oriented), then the optimization will be carried out via sklearn's</span>
|
|
||||||
<span class="sd"> `GridSearchCV <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html>`_.</span>
|
|
||||||
|
|
||||||
<span class="sd"> Example to instantiate an :class:`Ensemble` based on :class:`quapy.method.aggregative.PACC`</span>
|
|
||||||
<span class="sd"> in which the base members are optimized for :meth:`quapy.error.mae` via</span>
|
|
||||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`. The ensemble follows the policy `Accuracy` based</span>
|
|
||||||
<span class="sd"> on :meth:`quapy.error.mae` (the same measure being optimized),</span>
|
|
||||||
<span class="sd"> meaning that a static selection of members of the ensemble is made based on their performance</span>
|
|
||||||
<span class="sd"> in terms of this error.</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> param_grid = {</span>
|
|
||||||
<span class="sd"> >>> 'C': np.logspace(-3,3,7),</span>
|
|
||||||
<span class="sd"> >>> 'class_weight': ['balanced', None]</span>
|
|
||||||
<span class="sd"> >>> }</span>
|
|
||||||
<span class="sd"> >>> param_mod_sel = {</span>
|
|
||||||
<span class="sd"> >>> 'sample_size': 500,</span>
|
|
||||||
<span class="sd"> >>> 'protocol': 'app'</span>
|
|
||||||
<span class="sd"> >>> }</span>
|
|
||||||
<span class="sd"> >>> common={</span>
|
|
||||||
<span class="sd"> >>> 'max_sample_size': 1000,</span>
|
|
||||||
<span class="sd"> >>> 'n_jobs': -1,</span>
|
|
||||||
<span class="sd"> >>> 'param_grid': param_grid,</span>
|
|
||||||
<span class="sd"> >>> 'param_mod_sel': param_mod_sel,</span>
|
|
||||||
<span class="sd"> >>> }</span>
|
|
||||||
<span class="sd"> >>></span>
|
|
||||||
<span class="sd"> >>> ensembleFactory(LogisticRegression(), PACC, optim='mae', policy='mae', **common)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param base_quantifier_class: a class of quantifiers</span>
|
|
||||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
|
||||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
|
||||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
|
||||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
|
||||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">optim</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">param_grid</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'param_grid is None but optim was requested.'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">param_model_sel</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'param_model_sel is None but optim was requested.'</span><span class="p">)</span>
|
|
||||||
<span class="n">error</span> <span class="o">=</span> <span class="n">_check_error</span><span class="p">(</span><span class="n">optim</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">_instantiate_ensemble</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">error</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ECC"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.ECC">[docs]</a><span class="k">def</span> <span class="nf">ECC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.CC` quantifiers, as used by</span>
|
|
||||||
<span class="sd"> `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
|
||||||
|
|
||||||
<span class="sd"> Equivalent to:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> ensembleFactory(classifier, CC, param_grid, optim, param_mod_sel, **kwargs)</span>
|
|
||||||
|
|
||||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
|
||||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
|
||||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
|
||||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
|
||||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">CC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="EACC"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EACC">[docs]</a><span class="k">def</span> <span class="nf">EACC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.ACC` quantifiers, as used by</span>
|
|
||||||
<span class="sd"> `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
|
||||||
|
|
||||||
<span class="sd"> Equivalent to:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> ensembleFactory(classifier, ACC, param_grid, optim, param_mod_sel, **kwargs)</span>
|
|
||||||
|
|
||||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
|
||||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
|
||||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
|
||||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
|
||||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">ACC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="EPACC"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EPACC">[docs]</a><span class="k">def</span> <span class="nf">EPACC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.PACC` quantifiers.</span>
|
|
||||||
|
|
||||||
<span class="sd"> Equivalent to:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs)</span>
|
|
||||||
|
|
||||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
|
||||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
|
||||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
|
||||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
|
||||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">PACC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="EHDy"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EHDy">[docs]</a><span class="k">def</span> <span class="nf">EHDy</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.HDy` quantifiers, as used by</span>
|
|
||||||
<span class="sd"> `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
|
||||||
|
|
||||||
<span class="sd"> Equivalent to:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> ensembleFactory(classifier, HDy, param_grid, optim, param_mod_sel, **kwargs)</span>
|
|
||||||
|
|
||||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
|
||||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
|
||||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
|
||||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
|
||||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">HDy</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="EEMQ"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EEMQ">[docs]</a><span class="k">def</span> <span class="nf">EEMQ</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.EMQ` quantifiers.</span>
|
|
||||||
|
|
||||||
<span class="sd"> Equivalent to:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs)</span>
|
|
||||||
|
|
||||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
|
||||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
|
||||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
|
||||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
|
||||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
|
||||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,266 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.method.non_aggregative — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../../_static/jquery.js"></script>
|
|
||||||
<script src="../../../_static/underscore.js"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../../_static/doctools.js"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.method.non_aggregative</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.method.non_aggregative</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">get_divergence</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation">[docs]</a><span class="k">class</span> <span class="nc">MaximumLikelihoodPrevalenceEstimation</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> The `Maximum Likelihood Prevalence Estimation` (MLPE) method is a lazy method that assumes there is no prior</span>
|
|
||||||
<span class="sd"> probability shift between training and test instances (put it other way, that the i.i.d. assumpion holds).</span>
|
|
||||||
<span class="sd"> The estimation of class prevalence values for any test sample is always (i.e., irrespective of the test sample</span>
|
|
||||||
<span class="sd"> itself) the class prevalence seen during training. This method is considered to be a lower-bound quantifier that</span>
|
|
||||||
<span class="sd"> any quantification method should beat.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Computes the training prevalence and stores it.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param data: the training sample</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">estimated_prevalence</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Ignores the input instances and returns, as the class prevalence estimantes, the training prevalence.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param instances: array-like (ignored)</span>
|
|
||||||
<span class="sd"> :return: the class prevalence seen during training</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">estimated_prevalence</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="DMx"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx">[docs]</a><span class="k">class</span> <span class="nc">DMx</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of covariates.</span>
|
|
||||||
<span class="sd"> This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param nbins: number of bins used to discretize the distributions (default 8)</span>
|
|
||||||
<span class="sd"> :param divergence: a string representing a divergence measure (currently, "HD" and "topsoe" are implemented)</span>
|
|
||||||
<span class="sd"> or a callable function taking two ndarrays of the same dimension as input (default "HD", meaning Hellinger</span>
|
|
||||||
<span class="sd"> Distance)</span>
|
|
||||||
<span class="sd"> :param cdf: whether to use CDF instead of PDF (default False)</span>
|
|
||||||
<span class="sd"> :param n_jobs: number of parallel workers (default None)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nbins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]</span><span class="o">=</span><span class="s1">'HD'</span><span class="p">,</span> <span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">search</span><span class="o">=</span><span class="s1">'optim_minimize'</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">nbins</span> <span class="o">=</span> <span class="n">nbins</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">cdf</span> <span class="o">=</span> <span class="n">cdf</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">search</span> <span class="o">=</span> <span class="n">search</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="DMx.HDx"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.HDx">[docs]</a> <span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">HDx</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> `Hellinger Distance x <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDx).</span>
|
|
||||||
<span class="sd"> HDx is a method for training binary quantifiers, that models quantification as the problem of</span>
|
|
||||||
<span class="sd"> minimizing the average divergence (in terms of the Hellinger Distance) across the feature-specific normalized</span>
|
|
||||||
<span class="sd"> histograms of two representations, one for the unlabelled examples, and another generated from the training</span>
|
|
||||||
<span class="sd"> examples as a mixture model of the class-specific representations. The parameters of the mixture thus represent</span>
|
|
||||||
<span class="sd"> the estimates of the class prevalence values.</span>
|
|
||||||
|
|
||||||
<span class="sd"> The method computes all matchings for nbins in [10, 20, ..., 110] and reports the mean of the median.</span>
|
|
||||||
<span class="sd"> The best prevalence is searched via linear search, from 0 to 1 stepping by 0.01.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
|
||||||
<span class="sd"> :return: an instance of this class setup to mimick the performance of the HDx as originally proposed by</span>
|
|
||||||
<span class="sd"> González-Castro, Alaiz-Rodríguez, Alegre (2013)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">MedianEstimator</span>
|
|
||||||
|
|
||||||
<span class="n">dmx</span> <span class="o">=</span> <span class="n">DMx</span><span class="p">(</span><span class="n">divergence</span><span class="o">=</span><span class="s1">'HD'</span><span class="p">,</span> <span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">search</span><span class="o">=</span><span class="s1">'linear_search'</span><span class="p">)</span>
|
|
||||||
<span class="n">nbins</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'nbins'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">110</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)}</span>
|
|
||||||
<span class="n">hdx</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">base_quantifier</span><span class="o">=</span><span class="n">dmx</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">nbins</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">hdx</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__get_distributions</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">histograms</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">feat_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="p">):</span>
|
|
||||||
<span class="n">feature</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span> <span class="n">feat_idx</span><span class="p">]</span>
|
|
||||||
<span class="n">feat_range</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">feat_ranges</span><span class="p">[</span><span class="n">feat_idx</span><span class="p">]</span>
|
|
||||||
<span class="n">hist</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">feature</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nbins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="n">feat_range</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
|
||||||
<span class="n">norm_hist</span> <span class="o">=</span> <span class="n">hist</span> <span class="o">/</span> <span class="n">hist</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="n">histograms</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">norm_hist</span><span class="p">)</span>
|
|
||||||
<span class="n">distributions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">(</span><span class="n">histograms</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdf</span><span class="p">:</span>
|
|
||||||
<span class="n">distributions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="n">distributions</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">distributions</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="DMx.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generates the validation distributions out of the training data (covariates).</span>
|
|
||||||
<span class="sd"> The validation distributions have shape `(n, nfeats, nbins)`, with `n` the number of classes, `nfeats`</span>
|
|
||||||
<span class="sd"> the number of features, and `nbins` the number of bins.</span>
|
|
||||||
<span class="sd"> In particular, let `V` be the validation distributions; then `di=V[i]` are the distributions obtained from</span>
|
|
||||||
<span class="sd"> training data labelled with class `i`; while `dij = di[j]` is the discrete distribution for feature j in</span>
|
|
||||||
<span class="sd"> training data labelled with class `i`, and `dij[k]` is the fraction of instances with a value in the `k`-th bin.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param data: the training set</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">Xy</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">feat_ranges</span> <span class="o">=</span> <span class="n">_get_features_range</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
|
|
||||||
<span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">__get_distributions</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">cat</span><span class="p">])</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)]</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="DMx.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution</span>
|
|
||||||
<span class="sd"> (the mixture) that best matches the test distribution, in terms of the divergence measure of choice.</span>
|
|
||||||
<span class="sd"> The matching is computed as the average dissimilarity (in terms of the dissimilarity measure of choice)</span>
|
|
||||||
<span class="sd"> between all feature-specific discrete distributions.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param instances: instances in the sample</span>
|
|
||||||
<span class="sd"> :return: a vector of class prevalence estimates</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'wrong shape; expected </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
|
|
||||||
<span class="n">test_distribution</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__get_distributions</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">divergence</span> <span class="o">=</span> <span class="n">get_divergence</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="p">)</span>
|
|
||||||
<span class="n">n_classes</span><span class="p">,</span> <span class="n">n_feats</span><span class="p">,</span> <span class="n">nbins</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">shape</span>
|
|
||||||
<span class="k">def</span> <span class="nf">loss</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
|
||||||
<span class="n">prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">expand_dims</span><span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">mixture_distribution</span> <span class="o">=</span> <span class="p">(</span><span class="n">prev</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_feats</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">divs</span> <span class="o">=</span> <span class="p">[</span><span class="n">divergence</span><span class="p">(</span><span class="n">test_distribution</span><span class="p">[</span><span class="n">feat</span><span class="p">],</span> <span class="n">mixture_distribution</span><span class="p">[</span><span class="n">feat</span><span class="p">])</span> <span class="k">for</span> <span class="n">feat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_feats</span><span class="p">)]</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">divs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">argmin_prevalence</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">search</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_get_features_range</span><span class="p">(</span><span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="n">feat_ranges</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">ncols</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="k">for</span> <span class="n">col_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">ncols</span><span class="p">):</span>
|
|
||||||
<span class="n">feature</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span><span class="n">col_idx</span><span class="p">]</span>
|
|
||||||
<span class="n">feat_ranges</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">np</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">feature</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">feature</span><span class="p">)))</span>
|
|
||||||
<span class="k">return</span> <span class="n">feat_ranges</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="c1">#---------------------------------------------------------------</span>
|
|
||||||
<span class="c1"># aliases</span>
|
|
||||||
<span class="c1">#---------------------------------------------------------------</span>
|
|
||||||
|
|
||||||
<span class="n">DistributionMatchingX</span> <span class="o">=</span> <span class="n">DMx</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,516 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.model_selection — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../_static/jquery.js"></script>
|
|
||||||
<script src="../../_static/underscore.js"></script>
|
|
||||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../_static/doctools.js"></script>
|
|
||||||
<script src="../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.model_selection</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.model_selection</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">signal</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">enum</span> <span class="kn">import</span> <span class="n">Enum</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">wraps</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn</span> <span class="kn">import</span> <span class="n">clone</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy</span> <span class="kn">import</span> <span class="n">evaluation</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">timeout</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="Status"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.Status">[docs]</a><span class="k">class</span> <span class="nc">Status</span><span class="p">(</span><span class="n">Enum</span><span class="p">):</span>
|
|
||||||
<span class="n">SUCCESS</span> <span class="o">=</span> <span class="mi">1</span>
|
|
||||||
<span class="n">TIMEOUT</span> <span class="o">=</span> <span class="mi">2</span>
|
|
||||||
<span class="n">INVALID</span> <span class="o">=</span> <span class="mi">3</span>
|
|
||||||
<span class="n">ERROR</span> <span class="o">=</span> <span class="mi">4</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ConfigStatus"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus">[docs]</a><span class="k">class</span> <span class="nc">ConfigStatus</span><span class="p">:</span>
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">msg</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="n">status</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">msg</span> <span class="o">=</span> <span class="n">msg</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="sa">f</span><span class="s1">':params:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="si">}</span><span class="s1"> :status:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="si">}</span><span class="s1"> '</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">msg</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ConfigStatus.success"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus.success">[docs]</a> <span class="k">def</span> <span class="nf">success</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">==</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ConfigStatus.failed"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus.failed">[docs]</a> <span class="k">def</span> <span class="nf">failed</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">!=</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="GridSearchQ"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ">[docs]</a><span class="k">class</span> <span class="nc">GridSearchQ</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Grid Search optimization targeting a quantification-oriented metric.</span>
|
|
||||||
|
|
||||||
<span class="sd"> Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation</span>
|
|
||||||
<span class="sd"> protocol for quantification.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param model: the quantifier to optimize</span>
|
|
||||||
<span class="sd"> :type model: BaseQuantifier</span>
|
|
||||||
<span class="sd"> :param param_grid: a dictionary with keys the parameter names and values the list of values to explore</span>
|
|
||||||
<span class="sd"> :param protocol: a sample generation protocol, an instance of :class:`quapy.protocol.AbstractProtocol`</span>
|
|
||||||
<span class="sd"> :param error: an error function (callable) or a string indicating the name of an error function (valid ones</span>
|
|
||||||
<span class="sd"> are those in :class:`quapy.error.QUANTIFICATION_ERROR`</span>
|
|
||||||
<span class="sd"> :param refit: whether to refit the model on the whole labelled collection (training+validation) with</span>
|
|
||||||
<span class="sd"> the best chosen hyperparameter combination. Ignored if protocol='gen'</span>
|
|
||||||
<span class="sd"> :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested.</span>
|
|
||||||
<span class="sd"> Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up</span>
|
|
||||||
<span class="sd"> being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set.</span>
|
|
||||||
<span class="sd"> :param raise_errors: boolean, if True then raises an exception when a param combination yields any error, if</span>
|
|
||||||
<span class="sd"> otherwise is False (default), then the combination is marked with an error status, but the process goes on.</span>
|
|
||||||
<span class="sd"> However, if no configuration yields a valid model, then a ValueError exception will be raised.</span>
|
|
||||||
<span class="sd"> :param verbose: set to True to get information through the stdout</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
|
||||||
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
|
||||||
<span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span>
|
|
||||||
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
|
|
||||||
<span class="n">error</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Callable</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">,</span>
|
|
||||||
<span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
||||||
<span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">raise_errors</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
|
||||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">model</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">protocol</span> <span class="o">=</span> <span class="n">protocol</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">refit</span> <span class="o">=</span> <span class="n">refit</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">timeout</span> <span class="o">=</span> <span class="n">timeout</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">raise_errors</span> <span class="o">=</span> <span class="n">raise_errors</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">__check_error</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="n">AbstractProtocol</span><span class="p">),</span> <span class="s1">'unknown protocol'</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_sout</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">]: </span><span class="si">{</span><span class="n">msg</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">__check_error</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">error</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">error</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
|
|
||||||
<span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="s1">'__call__'</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">error</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'unexpected error type; must either be a callable function or a str representing</span><span class="se">\n</span><span class="s1">'</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'the name of an error function in </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_prepare_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">cls_params</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_params</span><span class="p">)</span>
|
|
||||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">predictions</span>
|
|
||||||
|
|
||||||
<span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[classifier fit] hyperparams=</span><span class="si">{</span><span class="n">cls_params</span><span class="si">}</span><span class="s1"> [took </span><span class="si">{</span><span class="n">took</span><span class="si">:</span><span class="s1">.3f</span><span class="si">}</span><span class="s1">s]'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_prepare_aggregation</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">cls_took</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">,</span> <span class="n">q_params</span> <span class="o">=</span> <span class="n">args</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
|
||||||
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">cls_params</span><span class="p">,</span> <span class="o">**</span><span class="n">q_params</span><span class="p">}</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">q_params</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">q_params</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
|
|
||||||
<span class="n">score</span> <span class="o">=</span> <span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">score</span>
|
|
||||||
|
|
||||||
<span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">aggr_took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">q_params</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_print_status</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">aggr_took</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="p">(</span><span class="n">cls_took</span><span class="o">+</span><span class="n">aggr_took</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_prepare_nonaggr_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
|
|
||||||
<span class="n">score</span> <span class="o">=</span> <span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">score</span>
|
|
||||||
|
|
||||||
<span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_print_status</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_break_down_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Decides whether to break down the fit phase in two (classifier-fit followed by aggregation-fit).</span>
|
|
||||||
<span class="sd"> In order to do so, some conditions should be met: a) the quantifier is of type aggregative,</span>
|
|
||||||
<span class="sd"> b) the set of hyperparameters can be split into two disjoint non-empty groups.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: True if the conditions are met, False otherwise</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="kc">False</span>
|
|
||||||
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">cls_configs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">q_configs</span><span class="p">)</span><span class="o">==</span><span class="mi">1</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="kc">False</span>
|
|
||||||
<span class="k">return</span> <span class="kc">True</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_compute_scores_aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">):</span>
|
|
||||||
<span class="c1"># break down the set of hyperparameters into two: classifier-specific, quantifier-specific</span>
|
|
||||||
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># train all classifiers and get the predictions</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_training</span> <span class="o">=</span> <span class="n">training</span>
|
|
||||||
<span class="n">cls_outs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_classifier</span><span class="p">,</span>
|
|
||||||
<span class="n">cls_configs</span><span class="p">,</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># filter out classifier configurations that yielded any error</span>
|
|
||||||
<span class="n">success_outs</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">),</span> <span class="n">cls_config</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">cls_outs</span><span class="p">,</span> <span class="n">cls_configs</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
|
|
||||||
<span class="n">success_outs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">took</span><span class="p">,</span> <span class="n">cls_config</span><span class="p">))</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">status</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">success_outs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'No valid configuration found for the classifier!'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># explore the quantifier-specific hyperparameters for each valid training configuration</span>
|
|
||||||
<span class="n">aggr_configs</span> <span class="o">=</span> <span class="p">[(</span><span class="o">*</span><span class="n">out</span><span class="p">,</span> <span class="n">q_config</span><span class="p">)</span> <span class="k">for</span> <span class="n">out</span><span class="p">,</span> <span class="n">q_config</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">success_outs</span><span class="p">,</span> <span class="n">q_configs</span><span class="p">)]</span>
|
|
||||||
<span class="n">aggr_outs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_aggregation</span><span class="p">,</span>
|
|
||||||
<span class="n">aggr_configs</span><span class="p">,</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">aggr_outs</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_compute_scores_nonaggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">):</span>
|
|
||||||
<span class="n">configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_training</span> <span class="o">=</span> <span class="n">training</span>
|
|
||||||
<span class="n">scores</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_nonaggr_model</span><span class="p">,</span>
|
|
||||||
<span class="n">configs</span><span class="p">,</span>
|
|
||||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
|
||||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">scores</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_print_status</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'hyperparams=[</span><span class="si">{</span><span class="n">params</span><span class="si">}</span><span class="s1">]</span><span class="se">\t</span><span class="s1"> got </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> = </span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> [took </span><span class="si">{</span><span class="n">took</span><span class="si">:</span><span class="s1">.3f</span><span class="si">}</span><span class="s1">s]'</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error=</span><span class="si">{</span><span class="n">status</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="GridSearchQ.fit"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing</span>
|
|
||||||
<span class="sd"> the error metric.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param training: the training set on which to optimize the hyperparameters</span>
|
|
||||||
<span class="sd"> :return: self</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">refit</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'"refit" was requested, but the protocol does not implement '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'the </span><span class="si">{</span><span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> interface'</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'starting model selection with n_jobs=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_break_down_fit</span><span class="p">():</span>
|
|
||||||
<span class="n">results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_scores_aggregative</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_scores_nonaggregative</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span> <span class="o">=</span> <span class="p">{}</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="k">for</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="ow">in</span> <span class="n">results</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">score</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="o">=</span> <span class="n">score</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_params_</span> <span class="o">=</span> <span class="n">params</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span> <span class="o">=</span> <span class="n">model</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">params</span><span class="p">)]</span> <span class="o">=</span> <span class="n">score</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">params</span><span class="p">)]</span> <span class="o">=</span> <span class="n">status</span><span class="o">.</span><span class="n">status</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">status</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">tend</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'no combination of hyperparameters seemed to work'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'optimization finished: best params </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">best_params_</span><span class="si">}</span><span class="s1"> (score=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1">) '</span>
|
|
||||||
<span class="sa">f</span><span class="s1">'[took </span><span class="si">{</span><span class="n">tend</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s]'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">no_errors</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">no_errors</span><span class="o">></span><span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'warning: </span><span class="si">{</span><span class="n">no_errors</span><span class="si">}</span><span class="s1"> errors found'</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">err</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="si">{</span><span class="nb">str</span><span class="p">(</span><span class="n">err</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">refit</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
|
||||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'refitting on the whole development set'</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">())</span>
|
|
||||||
<span class="n">tend</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">refit_time_</span> <span class="o">=</span> <span class="n">tend</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="c1"># already checked</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the model cannot be refit on the whole dataset'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="bp">self</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="GridSearchQ.quantify"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param instances: sample contanining the instances</span>
|
|
||||||
<span class="sd"> :return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found</span>
|
|
||||||
<span class="sd"> by the model selection process.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'best_model_'</span><span class="p">),</span> <span class="s1">'quantify called before fit'</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="GridSearchQ.set_params"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Sets the hyper-parameters to explore.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param parameters: a dictionary with keys the parameter names and values the list of values to explore</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">parameters</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="GridSearchQ.get_params"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""Returns the dictionary of hyper-parameters to explore (`param_grid`)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param deep: Unused</span>
|
|
||||||
<span class="sd"> :return: the dictionary `param_grid`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="GridSearchQ.best_model"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.best_model">[docs]</a> <span class="k">def</span> <span class="nf">best_model</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination</span>
|
|
||||||
<span class="sd"> of hyper-parameters that minimized the error function.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a trained quantifier</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'best_model_'</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'best_model called before fit'</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_error_handler</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Endorses one job with two returned values: the status, and the time of execution</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param func: the function to be called</span>
|
|
||||||
<span class="sd"> :param params: parameters of the function</span>
|
|
||||||
<span class="sd"> :return: `tuple(out, status, time)` where `out` is the function output,</span>
|
|
||||||
<span class="sd"> `status` is an enum value from `Status`, and `time` is the time it</span>
|
|
||||||
<span class="sd"> took to complete the call</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">output</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_handle</span><span class="p">(</span><span class="n">status</span><span class="p">,</span> <span class="n">exception</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">raise_errors</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="n">exception</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">ConfigStatus</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">status</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="k">with</span> <span class="n">timeout</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">timeout</span><span class="p">):</span>
|
|
||||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
|
||||||
<span class="n">output</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
|
||||||
<span class="n">status</span> <span class="o">=</span> <span class="n">ConfigStatus</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">except</span> <span class="ne">TimeoutError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
||||||
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">TIMEOUT</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
||||||
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">INVALID</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
||||||
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">ERROR</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">took</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
|
|
||||||
<span class="k">return</span> <span class="n">output</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="cross_val_predict"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.cross_val_predict">[docs]</a><span class="k">def</span> <span class="nf">cross_val_predict</span><span class="p">(</span><span class="n">quantifier</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Akin to `scikit-learn's cross_val_predict <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_predict.html>`_</span>
|
|
||||||
<span class="sd"> but for quantification.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param quantifier: a quantifier issuing class prevalence values</span>
|
|
||||||
<span class="sd"> :param data: a labelled collection</span>
|
|
||||||
<span class="sd"> :param nfolds: number of folds for k-fold cross validation generation</span>
|
|
||||||
<span class="sd"> :param random_state: random seed for reproducibility</span>
|
|
||||||
<span class="sd"> :return: a vector of class prevalence values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">total_prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">for</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">nfolds</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">):</span>
|
|
||||||
<span class="n">quantifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
|
||||||
<span class="n">fold_prev</span> <span class="o">=</span> <span class="n">quantifier</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="n">rel_size</span> <span class="o">=</span> <span class="mf">1.</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">test</span><span class="p">)</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
|
||||||
<span class="n">total_prev</span> <span class="o">+=</span> <span class="n">fold_prev</span><span class="o">*</span><span class="n">rel_size</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">total_prev</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="expand_grid"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.expand_grid">[docs]</a><span class="k">def</span> <span class="nf">expand_grid</span><span class="p">(</span><span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Expands a param_grid dictionary as a list of configurations.</span>
|
|
||||||
<span class="sd"> Example:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> combinations = expand_grid({'A': [1, 10, 100], 'B': [True, False]})</span>
|
|
||||||
<span class="sd"> >>> print(combinations)</span>
|
|
||||||
<span class="sd"> >>> [{'A': 1, 'B': True}, {'A': 1, 'B': False}, {'A': 10, 'B': True}, {'A': 10, 'B': False}, {'A': 100, 'B': True}, {'A': 100, 'B': False}]</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range</span>
|
|
||||||
<span class="sd"> to explore for that hyper-parameter</span>
|
|
||||||
<span class="sd"> :return: a list of configurations, i.e., combinations of hyper-parameter assignments in the grid.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">params_keys</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">param_grid</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
|
||||||
<span class="n">params_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">param_grid</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
|
||||||
<span class="n">configs</span> <span class="o">=</span> <span class="p">[{</span><span class="n">k</span><span class="p">:</span> <span class="n">combs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">k</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">params_keys</span><span class="p">)}</span> <span class="k">for</span> <span class="n">combs</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="o">*</span><span class="n">params_values</span><span class="p">)]</span>
|
|
||||||
<span class="k">return</span> <span class="n">configs</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="group_params"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.group_params">[docs]</a><span class="k">def</span> <span class="nf">group_params</span><span class="p">(</span><span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Partitions a param_grid dictionary as two lists of configurations, one for the classifier-specific</span>
|
|
||||||
<span class="sd"> hyper-parameters, and another for que quantifier-specific hyper-parameters</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range</span>
|
|
||||||
<span class="sd"> to explore for that hyper-parameter</span>
|
|
||||||
<span class="sd"> :return: two expanded grids of configurations, one for the classifier, another for the quantifier</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">classifier_params</span><span class="p">,</span> <span class="n">quantifier_params</span> <span class="o">=</span> <span class="p">{},</span> <span class="p">{}</span>
|
|
||||||
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">values</span> <span class="ow">in</span> <span class="n">param_grid</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
|
||||||
<span class="k">if</span> <span class="n">key</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'classifier__'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">key</span> <span class="o">==</span> <span class="s1">'val_split'</span><span class="p">:</span>
|
|
||||||
<span class="n">classifier_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">values</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">quantifier_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">values</span>
|
|
||||||
|
|
||||||
<span class="n">classifier_configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="n">classifier_params</span><span class="p">)</span>
|
|
||||||
<span class="n">quantifier_configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="n">quantifier_params</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">classifier_configs</span><span class="p">,</span> <span class="n">quantifier_configs</span></div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,687 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.plot — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.plot</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.plot</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">matplotlib.cm</span> <span class="kn">import</span> <span class="n">get_cmap</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">matplotlib</span> <span class="kn">import</span> <span class="n">cm</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">scipy.stats</span> <span class="kn">import</span> <span class="n">ttest_ind_from_stats</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">matplotlib.ticker</span> <span class="kn">import</span> <span class="n">ScalarFormatter</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">math</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">'figure.figsize'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">6</span><span class="p">]</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">'figure.dpi'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">200</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">'font.size'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">18</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="binary_diagonal">
|
|
||||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_diagonal">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">binary_diagonal</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">show_std</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
||||||
<span class="n">train_prev</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> The diagonal plot displays the predicted prevalence values (along the y-axis) as a function of the true prevalence</span>
|
|
||||||
<span class="sd"> values (along the x-axis). The optimal quantifier is described by the diagonal (0,0)-(1,1) of the plot (hence the</span>
|
|
||||||
<span class="sd"> name). It is convenient for binary quantification problems, though it can be used for multiclass problems by</span>
|
|
||||||
<span class="sd"> indicating which class is to be taken as the positive class. (For multiclass quantification problems, other plots</span>
|
|
||||||
<span class="sd"> like the :meth:`error_by_drift` might be preferable though).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
|
||||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
|
||||||
<span class="sd"> each experiment</span>
|
|
||||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
|
||||||
<span class="sd"> for each experiment</span>
|
|
||||||
<span class="sd"> :param pos_class: index of the positive class</span>
|
|
||||||
<span class="sd"> :param title: the title to be displayed in the plot</span>
|
|
||||||
<span class="sd"> :param show_std: whether or not to show standard deviations (represented by color bands). This might be inconvenient</span>
|
|
||||||
<span class="sd"> for cases in which many methods are compared, or when the standard deviations are high -- default True)</span>
|
|
||||||
<span class="sd"> :param legend: whether or not to display the leyend (default True)</span>
|
|
||||||
<span class="sd"> :param train_prev: if indicated (default is None), the training prevalence (for the positive class) is hightlighted</span>
|
|
||||||
<span class="sd"> in the plot. This is convenient when all the experiments have been conducted in the same dataset.</span>
|
|
||||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
|
||||||
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
|
|
||||||
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_aspect</span><span class="p">(</span><span class="s1">'equal'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">'--k'</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'ideal'</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">order</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">))</span>
|
|
||||||
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">table</span> <span class="o">=</span> <span class="p">{</span><span class="n">method_name</span><span class="p">:[</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">]</span> <span class="k">for</span> <span class="n">method_name</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="n">order</span><span class="p">}</span>
|
|
||||||
<span class="n">order</span> <span class="o">=</span> <span class="p">[(</span><span class="n">method_name</span><span class="p">,</span> <span class="o">*</span><span class="n">table</span><span class="p">[</span><span class="n">method_name</span><span class="p">])</span> <span class="k">for</span> <span class="n">method_name</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">NUM_COLORS</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">NUM_COLORS</span><span class="o">></span><span class="mi">10</span><span class="p">:</span>
|
|
||||||
<span class="n">cm</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">get_cmap</span><span class="p">(</span><span class="s1">'tab20'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_prop_cycle</span><span class="p">(</span><span class="n">color</span><span class="o">=</span><span class="p">[</span><span class="n">cm</span><span class="p">(</span><span class="mf">1.</span> <span class="o">*</span> <span class="n">i</span> <span class="o">/</span> <span class="n">NUM_COLORS</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUM_COLORS</span><span class="p">)])</span>
|
|
||||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="n">order</span><span class="p">:</span>
|
|
||||||
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
|
||||||
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">x_ticks</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">true_prev</span><span class="p">)</span>
|
|
||||||
<span class="n">x_ticks</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
|
||||||
<span class="n">y_ave</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">true_prev</span> <span class="o">==</span> <span class="n">x</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">x_ticks</span><span class="p">])</span>
|
|
||||||
<span class="n">y_std</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">true_prev</span> <span class="o">==</span> <span class="n">x</span><span class="p">]</span><span class="o">.</span><span class="n">std</span><span class="p">()</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">x_ticks</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">x_ticks</span><span class="p">,</span> <span class="n">y_ave</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">'-'</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">'o'</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">show_std</span><span class="p">:</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">x_ticks</span><span class="p">,</span> <span class="n">y_ave</span> <span class="o">-</span> <span class="n">y_std</span><span class="p">,</span> <span class="n">y_ave</span> <span class="o">+</span> <span class="n">y_std</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.25</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">train_prev</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">train_prev</span> <span class="o">=</span> <span class="n">train_prev</span><span class="p">[</span><span class="n">pos_class</span><span class="p">]</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">scatter</span><span class="p">(</span><span class="n">train_prev</span><span class="p">,</span> <span class="n">train_prev</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">'c'</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'tr-prev'</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">'k'</span><span class="p">,</span> <span class="n">s</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="s1">'true prevalence'</span><span class="p">,</span> <span class="n">ylabel</span><span class="o">=</span><span class="s1">'estimated prevalence'</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">legend</span><span class="p">:</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s1">'center left'</span><span class="p">,</span> <span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">))</span>
|
|
||||||
<span class="c1"># box = ax.get_position()</span>
|
|
||||||
<span class="c1"># ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])</span>
|
|
||||||
<span class="c1"># ax.legend(loc='lower center',</span>
|
|
||||||
<span class="c1"># bbox_to_anchor=(1, -0.5),</span>
|
|
||||||
<span class="c1"># ncol=(len(method_names)+1)//2)</span>
|
|
||||||
|
|
||||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="binary_bias_global">
|
|
||||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_bias_global">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">binary_bias_global</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value)</span>
|
|
||||||
<span class="sd"> for each quantification method with respect to a given positive class.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
|
||||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
|
||||||
<span class="sd"> each experiment</span>
|
|
||||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
|
||||||
<span class="sd"> for each experiment</span>
|
|
||||||
<span class="sd"> :param pos_class: index of the positive class</span>
|
|
||||||
<span class="sd"> :param title: the title to be displayed in the plot</span>
|
|
||||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">data</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
|
||||||
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
|
||||||
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
|
||||||
<span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prev</span><span class="o">-</span><span class="n">true_prev</span><span class="p">)</span>
|
|
||||||
<span class="n">labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">boxplot</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">labels</span><span class="p">,</span> <span class="n">patch_artist</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">showmeans</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">xticks</span><span class="p">(</span><span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">ylabel</span><span class="o">=</span><span class="s1">'error bias'</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="binary_bias_bins">
|
|
||||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_bias_bins">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">binary_bias_bins</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">nbins</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">colormap</span><span class="o">=</span><span class="n">cm</span><span class="o">.</span><span class="n">tab10</span><span class="p">,</span>
|
|
||||||
<span class="n">vertical_xticks</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value)</span>
|
|
||||||
<span class="sd"> for different bins of (true) prevalence of the positive classs, for each quantification method.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
|
||||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
|
||||||
<span class="sd"> each experiment</span>
|
|
||||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
|
||||||
<span class="sd"> for each experiment</span>
|
|
||||||
<span class="sd"> :param pos_class: index of the positive class</span>
|
|
||||||
<span class="sd"> :param title: the title to be displayed in the plot</span>
|
|
||||||
<span class="sd"> :param nbins: number of bins</span>
|
|
||||||
<span class="sd"> :param colormap: the matplotlib colormap to use (default cm.tab10)</span>
|
|
||||||
<span class="sd"> :param vertical_xticks: whether or not to add secondary grid (default is False)</span>
|
|
||||||
<span class="sd"> :param legend: whether or not to display the legend (default is True)</span>
|
|
||||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">pylab</span> <span class="kn">import</span> <span class="n">boxplot</span><span class="p">,</span> <span class="n">plot</span><span class="p">,</span> <span class="n">setp</span>
|
|
||||||
|
|
||||||
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">nbins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">binwidth</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="n">nbins</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
|
|
||||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
|
||||||
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
|
||||||
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">ind</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)):</span>
|
|
||||||
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span><span class="o">==</span><span class="n">ind</span>
|
|
||||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">selected</span><span class="p">]</span> <span class="o">-</span> <span class="n">true_prev</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="n">nmethods</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span>
|
|
||||||
<span class="n">boxwidth</span> <span class="o">=</span> <span class="n">binwidth</span><span class="o">/</span><span class="p">(</span><span class="n">nmethods</span><span class="o">+</span><span class="mi">4</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="nb">bin</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]):</span>
|
|
||||||
<span class="n">boxdata</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">method_names</span><span class="p">]</span>
|
|
||||||
<span class="n">positions</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bin</span><span class="o">+</span><span class="p">(</span><span class="n">i</span><span class="o">*</span><span class="n">boxwidth</span><span class="p">)</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">boxwidth</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">method_names</span><span class="p">)]</span>
|
|
||||||
<span class="n">box</span> <span class="o">=</span> <span class="n">boxplot</span><span class="p">(</span><span class="n">boxdata</span><span class="p">,</span> <span class="n">showmeans</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">positions</span><span class="o">=</span><span class="n">positions</span><span class="p">,</span> <span class="n">widths</span> <span class="o">=</span> <span class="n">boxwidth</span><span class="p">,</span> <span class="n">sym</span><span class="o">=</span><span class="s1">'+'</span><span class="p">,</span> <span class="n">patch_artist</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">boxid</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)):</span>
|
|
||||||
<span class="n">c</span> <span class="o">=</span> <span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">[</span><span class="n">boxid</span><span class="o">%</span><span class="nb">len</span><span class="p">(</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">)]</span>
|
|
||||||
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">'fliers'</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="n">c</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">'+'</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mf">3.</span><span class="p">,</span> <span class="n">markeredgecolor</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
|
|
||||||
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">'boxes'</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
|
|
||||||
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">'medians'</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="s1">'k'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">major_xticks_positions</span><span class="p">,</span> <span class="n">minor_xticks_positions</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="n">major_xticks_labels</span><span class="p">,</span> <span class="n">minor_xticks_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">b</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]):</span>
|
|
||||||
<span class="n">major_xticks_positions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">b</span><span class="p">)</span>
|
|
||||||
<span class="n">minor_xticks_positions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">b</span> <span class="o">+</span> <span class="n">binwidth</span> <span class="o">/</span> <span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="n">major_xticks_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">''</span><span class="p">)</span>
|
|
||||||
<span class="n">minor_xticks_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[</span><span class="si">{</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">-</span><span class="si">{</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticks</span><span class="p">(</span><span class="n">major_xticks_positions</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticks</span><span class="p">(</span><span class="n">minor_xticks_positions</span><span class="p">,</span> <span class="n">minor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticklabels</span><span class="p">(</span><span class="n">major_xticks_labels</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticklabels</span><span class="p">(</span><span class="n">minor_xticks_labels</span><span class="p">,</span> <span class="n">minor</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="s1">'vertical'</span> <span class="k">if</span> <span class="n">vertical_xticks</span> <span class="k">else</span> <span class="s1">'horizontal'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">vertical_xticks</span><span class="p">:</span>
|
|
||||||
<span class="c1"># Pad margins so that markers don't get clipped by the axes</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">margins</span><span class="p">(</span><span class="mf">0.2</span><span class="p">)</span>
|
|
||||||
<span class="c1"># Tweak spacing to prevent clipping of tick-labels</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">bottom</span><span class="o">=</span><span class="mf">0.15</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">legend</span><span class="p">:</span>
|
|
||||||
<span class="c1"># adds the legend to the list hs, initialized with the "ideal" quantifier (one that has 0 bias across all bins. i.e.</span>
|
|
||||||
<span class="c1"># a line from (0,0) to (1,0). The other elements are simply labelled dot-plots that are to be removed (setting</span>
|
|
||||||
<span class="c1"># set_visible to False for all but the first element) after the legend has been placed</span>
|
|
||||||
<span class="n">hs</span><span class="o">=</span><span class="p">[</span><span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">'-k'</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)[</span><span class="mi">0</span><span class="p">]]</span>
|
|
||||||
<span class="k">for</span> <span class="n">colorid</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)):</span>
|
|
||||||
<span class="n">color</span><span class="o">=</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">[</span><span class="n">colorid</span> <span class="o">%</span> <span class="nb">len</span><span class="p">(</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">)]</span>
|
|
||||||
<span class="n">h</span><span class="p">,</span> <span class="o">=</span> <span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">'-s'</span><span class="p">,</span> <span class="n">markerfacecolor</span><span class="o">=</span><span class="n">color</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'k'</span><span class="p">,</span><span class="n">mec</span><span class="o">=</span><span class="n">color</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mf">1.</span><span class="p">)</span>
|
|
||||||
<span class="n">hs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
|
|
||||||
<span class="n">box</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">get_position</span><span class="p">()</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_position</span><span class="p">([</span><span class="n">box</span><span class="o">.</span><span class="n">x0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">y0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">width</span> <span class="o">*</span> <span class="mf">0.8</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">height</span><span class="p">])</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">hs</span><span class="p">,</span> <span class="p">[</span><span class="s1">'ideal'</span><span class="p">]</span><span class="o">+</span><span class="n">method_names</span><span class="p">,</span> <span class="n">loc</span><span class="o">=</span><span class="s1">'center left'</span><span class="p">,</span> <span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">))</span>
|
|
||||||
<span class="p">[</span><span class="n">h</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span> <span class="k">for</span> <span class="n">h</span> <span class="ow">in</span> <span class="n">hs</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]</span>
|
|
||||||
|
|
||||||
<span class="c1"># x-axis and y-axis labels and limits</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="s1">'prevalence'</span><span class="p">,</span> <span class="n">ylabel</span><span class="o">=</span><span class="s1">'error bias'</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="error_by_drift">
|
|
||||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.error_by_drift">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">error_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span>
|
|
||||||
<span class="n">n_bins</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">error_name</span><span class="o">=</span><span class="s1">'ae'</span><span class="p">,</span> <span class="n">show_std</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
|
||||||
<span class="n">show_density</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
||||||
<span class="n">show_legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
|
||||||
<span class="n">logscale</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
|
||||||
<span class="n">title</span><span class="o">=</span><span class="sa">f</span><span class="s1">'Quantification error as a function of distribution shift'</span><span class="p">,</span>
|
|
||||||
<span class="n">vlines</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift</span>
|
|
||||||
<span class="sd"> (along the y-axis, as measured in terms of :meth:`quapy.error.ae`). This plot is useful especially for multiclass</span>
|
|
||||||
<span class="sd"> problems, in which "diagonal plots" may be cumbersone, and in order to gain understanding about how methods</span>
|
|
||||||
<span class="sd"> fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the</span>
|
|
||||||
<span class="sd"> high-shift regime).</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
|
||||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
|
||||||
<span class="sd"> each experiment</span>
|
|
||||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
|
||||||
<span class="sd"> for each experiment</span>
|
|
||||||
<span class="sd"> :param tr_prevs: training prevalence of each experiment</span>
|
|
||||||
<span class="sd"> :param n_bins: number of bins in which the y-axis is to be divided (default is 20)</span>
|
|
||||||
<span class="sd"> :param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae")</span>
|
|
||||||
<span class="sd"> :param show_std: whether or not to show standard deviations as color bands (default is False)</span>
|
|
||||||
<span class="sd"> :param show_density: whether or not to display the distribution of experiments for each bin (default is True)</span>
|
|
||||||
<span class="sd"> :param show_density: whether or not to display the legend of the chart (default is True)</span>
|
|
||||||
<span class="sd"> :param logscale: whether or not to log-scale the y-error measure (default is False)</span>
|
|
||||||
<span class="sd"> :param title: title of the plot (default is "Quantification error as a function of distribution shift")</span>
|
|
||||||
<span class="sd"> :param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space</span>
|
|
||||||
<span class="sd"> using vertical dotted lines.</span>
|
|
||||||
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
|
|
||||||
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
|
|
||||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">x_error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">ae</span>
|
|
||||||
<span class="n">y_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">error_name</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same</span>
|
|
||||||
<span class="c1"># order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to</span>
|
|
||||||
<span class="c1"># x_error function) and 'y' is the estim-test shift (computed as according to y_error)</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">method_order</span> <span class="o">=</span> <span class="n">method_names</span>
|
|
||||||
|
|
||||||
<span class="n">_set_colors</span><span class="p">(</span><span class="n">ax</span><span class="p">,</span> <span class="n">n_methods</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">method_order</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">binwidth</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">/</span> <span class="n">n_bins</span>
|
|
||||||
<span class="n">min_x</span><span class="p">,</span> <span class="n">max_x</span><span class="p">,</span> <span class="n">min_y</span><span class="p">,</span> <span class="n">max_y</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
|
||||||
<span class="n">npoints</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
|
|
||||||
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'x'</span><span class="p">]</span>
|
|
||||||
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'y'</span><span class="p">]</span>
|
|
||||||
<span class="k">if</span> <span class="n">logscale</span><span class="p">:</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_yscale</span><span class="p">(</span><span class="s2">"log"</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">yaxis</span><span class="o">.</span><span class="n">set_major_formatter</span><span class="p">(</span><span class="n">ScalarFormatter</span><span class="p">())</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">yaxis</span><span class="o">.</span><span class="n">get_major_formatter</span><span class="p">()</span><span class="o">.</span><span class="n">set_scientific</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">minorticks_off</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">ystds</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">p</span><span class="p">,</span><span class="n">ind</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">))):</span>
|
|
||||||
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span><span class="o">==</span><span class="n">ind</span>
|
|
||||||
<span class="k">if</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="n">xs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">ind</span><span class="o">*</span><span class="n">binwidth</span><span class="o">-</span><span class="n">binwidth</span><span class="o">/</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="n">ys</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">]))</span>
|
|
||||||
<span class="n">ystds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">std</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">]))</span>
|
|
||||||
<span class="n">npoints</span><span class="p">[</span><span class="n">p</span><span class="p">]</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="n">xs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">xs</span><span class="p">)</span>
|
|
||||||
<span class="n">ys</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">ys</span><span class="p">)</span>
|
|
||||||
<span class="n">ystds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">ystds</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">min_x_method</span><span class="p">,</span> <span class="n">max_x_method</span><span class="p">,</span> <span class="n">min_y_method</span><span class="p">,</span> <span class="n">max_y_method</span> <span class="o">=</span> <span class="n">xs</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">xs</span><span class="o">.</span><span class="n">max</span><span class="p">(),</span> <span class="n">ys</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">ys</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
|
|
||||||
<span class="n">min_x</span> <span class="o">=</span> <span class="n">min_x_method</span> <span class="k">if</span> <span class="n">min_x</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">min_x_method</span> <span class="o"><</span> <span class="n">min_x</span> <span class="k">else</span> <span class="n">min_x</span>
|
|
||||||
<span class="n">max_x</span> <span class="o">=</span> <span class="n">max_x_method</span> <span class="k">if</span> <span class="n">max_x</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_x_method</span> <span class="o">></span> <span class="n">max_x</span> <span class="k">else</span> <span class="n">max_x</span>
|
|
||||||
<span class="n">max_y</span> <span class="o">=</span> <span class="n">max_y_method</span> <span class="k">if</span> <span class="n">max_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_y_method</span> <span class="o">></span> <span class="n">max_y</span> <span class="k">else</span> <span class="n">max_y</span>
|
|
||||||
<span class="n">min_y</span> <span class="o">=</span> <span class="n">min_y_method</span> <span class="k">if</span> <span class="n">min_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">min_y_method</span> <span class="o"><</span> <span class="n">min_y</span> <span class="k">else</span> <span class="n">min_y</span>
|
|
||||||
<span class="n">max_y</span> <span class="o">=</span> <span class="n">max_y_method</span> <span class="k">if</span> <span class="n">max_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_y_method</span> <span class="o">></span> <span class="n">max_y</span> <span class="k">else</span> <span class="n">max_y</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">'-'</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">'o'</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'w'</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">'-'</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">'o'</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">6</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">show_std</span><span class="p">:</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="o">-</span><span class="n">ystds</span><span class="p">,</span> <span class="n">ys</span><span class="o">+</span><span class="n">ystds</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.25</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">show_density</span><span class="p">:</span>
|
|
||||||
<span class="n">ax2</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">twinx</span><span class="p">()</span>
|
|
||||||
<span class="n">densities</span> <span class="o">=</span> <span class="n">npoints</span><span class="o">/</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">npoints</span><span class="p">)</span>
|
|
||||||
<span class="n">ax2</span><span class="o">.</span><span class="n">bar</span><span class="p">([</span><span class="n">ind</span> <span class="o">*</span> <span class="n">binwidth</span><span class="o">-</span><span class="n">binwidth</span><span class="o">/</span><span class="mi">2</span> <span class="k">for</span> <span class="n">ind</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">))],</span>
|
|
||||||
<span class="n">densities</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.15</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'g'</span><span class="p">,</span> <span class="n">width</span><span class="o">=</span><span class="n">binwidth</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'density'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax2</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="nb">max</span><span class="p">(</span><span class="n">densities</span><span class="p">))</span>
|
|
||||||
<span class="n">ax2</span><span class="o">.</span><span class="n">spines</span><span class="p">[</span><span class="s1">'right'</span><span class="p">]</span><span class="o">.</span><span class="n">set_color</span><span class="p">(</span><span class="s1">'g'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax2</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">'y'</span><span class="p">,</span> <span class="n">colors</span><span class="o">=</span><span class="s1">'g'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="sa">f</span><span class="s1">'Distribution shift between training set and test sample'</span><span class="p">,</span>
|
|
||||||
<span class="n">ylabel</span><span class="o">=</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">error_name</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span><span class="si">}</span><span class="s1"> (true distribution, predicted distribution)'</span><span class="p">,</span>
|
|
||||||
<span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
|
||||||
<span class="n">box</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">get_position</span><span class="p">()</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_position</span><span class="p">([</span><span class="n">box</span><span class="o">.</span><span class="n">x0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">y0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">width</span> <span class="o">*</span> <span class="mf">0.8</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">height</span><span class="p">])</span>
|
|
||||||
<span class="k">if</span> <span class="n">vlines</span><span class="p">:</span>
|
|
||||||
<span class="k">for</span> <span class="n">vline</span> <span class="ow">in</span> <span class="n">vlines</span><span class="p">:</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">axvline</span><span class="p">(</span><span class="n">vline</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">linestyle</span><span class="o">=</span><span class="s1">'--'</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'k'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">min_x</span><span class="p">,</span> <span class="n">max_x</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">logscale</span><span class="p">:</span>
|
|
||||||
<span class="c1">#nice scale for the logaritmic axis</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">10</span> <span class="o">**</span> <span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">log10</span><span class="p">(</span><span class="n">max_y</span><span class="p">)))</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">show_legend</span><span class="p">:</span>
|
|
||||||
<span class="n">fig</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s1">'lower center'</span><span class="p">,</span>
|
|
||||||
<span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">),</span>
|
|
||||||
<span class="n">ncol</span><span class="o">=</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="brokenbar_supremacy_by_drift">
|
|
||||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.brokenbar_supremacy_by_drift">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">brokenbar_supremacy_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span>
|
|
||||||
<span class="n">n_bins</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">binning</span><span class="o">=</span><span class="s1">'isomerous'</span><span class="p">,</span>
|
|
||||||
<span class="n">x_error</span><span class="o">=</span><span class="s1">'ae'</span><span class="p">,</span> <span class="n">y_error</span><span class="o">=</span><span class="s1">'ae'</span><span class="p">,</span> <span class="n">ttest_alpha</span><span class="o">=</span><span class="mf">0.005</span><span class="p">,</span> <span class="n">tail_density_threshold</span><span class="o">=</span><span class="mf">0.005</span><span class="p">,</span>
|
|
||||||
<span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Displays (only) the top performing methods for different regions of the train-test shift in form of a broken</span>
|
|
||||||
<span class="sd"> bar chart, in which each method has bars only for those regions in which either one of the following conditions</span>
|
|
||||||
<span class="sd"> hold: (i) it is the best method (in average) for the bin, or (ii) it is not statistically significantly different</span>
|
|
||||||
<span class="sd"> (in average) as according to a two-sided t-test on independent samples at confidence `ttest_alpha`.</span>
|
|
||||||
<span class="sd"> The binning can be made "isometric" (same size), or "isomerous" (same number of experiments -- default). A second</span>
|
|
||||||
<span class="sd"> plot is displayed on top, that displays the distribution of experiments for each bin (when binning="isometric") or</span>
|
|
||||||
<span class="sd"> the percentiles points of the distribution (when binning="isomerous").</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
|
||||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
|
||||||
<span class="sd"> each experiment</span>
|
|
||||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
|
||||||
<span class="sd"> for each experiment</span>
|
|
||||||
<span class="sd"> :param tr_prevs: training prevalence of each experiment</span>
|
|
||||||
<span class="sd"> :param n_bins: number of bins in which the y-axis is to be divided (default is 20)</span>
|
|
||||||
<span class="sd"> :param binning: type of binning, either "isomerous" (default) or "isometric"</span>
|
|
||||||
<span class="sd"> :param x_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for</span>
|
|
||||||
<span class="sd"> measuring the amount of train-test shift (default is "ae")</span>
|
|
||||||
<span class="sd"> :param y_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for</span>
|
|
||||||
<span class="sd"> measuring the amount of error in the prevalence estimations (default is "ae")</span>
|
|
||||||
<span class="sd"> :param ttest_alpha: the confidence interval above which a p-value (two-sided t-test on independent samples) is</span>
|
|
||||||
<span class="sd"> to be considered as an indicator that the two means are not statistically significantly different. Default is</span>
|
|
||||||
<span class="sd"> 0.005, meaning that a `p-value > 0.005` indicates the two methods involved are to be considered similar</span>
|
|
||||||
<span class="sd"> :param tail_density_threshold: sets a threshold on the density of experiments (over the total number of experiments)</span>
|
|
||||||
<span class="sd"> below which a bin in the tail (i.e., the right-most ones) will be discarded. This is in order to avoid some</span>
|
|
||||||
<span class="sd"> bins to be shown for train-test outliers.</span>
|
|
||||||
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
|
|
||||||
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
|
|
||||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
|
||||||
<span class="sd"> :return:</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">binning</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'isomerous'</span><span class="p">,</span> <span class="s1">'isometric'</span><span class="p">],</span> <span class="s1">'unknown binning type; valid types are "isomerous" and "isometric"'</span>
|
|
||||||
|
|
||||||
<span class="n">x_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">x_error</span><span class="p">)</span>
|
|
||||||
<span class="n">y_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same</span>
|
|
||||||
<span class="c1"># order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to</span>
|
|
||||||
<span class="c1"># x_error function) and 'y' is the estim-test shift (computed as according to y_error)</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">method_order</span> <span class="o">=</span> <span class="n">method_names</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">binning</span> <span class="o">==</span> <span class="s1">'isomerous'</span><span class="p">:</span>
|
|
||||||
<span class="c1"># take bins containing the same amount of examples</span>
|
|
||||||
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">'x'</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">])</span>
|
|
||||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">q</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="c1"># take equidistant bins</span>
|
|
||||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">bins</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mf">0.001</span>
|
|
||||||
<span class="n">bins</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">+=</span> <span class="mf">0.001</span>
|
|
||||||
|
|
||||||
<span class="c1"># we use this to keep track of how many datapoits contribute to each bin</span>
|
|
||||||
<span class="n">inds_histogram_global</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">n_bins</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
|
||||||
<span class="n">n_methods</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_order</span><span class="p">)</span>
|
|
||||||
<span class="n">buckets</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n_methods</span><span class="p">,</span> <span class="n">n_bins</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">method</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">method_order</span><span class="p">):</span>
|
|
||||||
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'x'</span><span class="p">]</span>
|
|
||||||
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'y'</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="n">inds_histogram_global</span> <span class="o">+=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">density</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)):</span>
|
|
||||||
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span> <span class="o">==</span> <span class="n">j</span>
|
|
||||||
<span class="k">if</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
|
|
||||||
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">std</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
|
|
||||||
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">=</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="c1"># cancel last buckets with low density</span>
|
|
||||||
<span class="n">histogram</span> <span class="o">=</span> <span class="n">inds_histogram_global</span> <span class="o">/</span> <span class="n">inds_histogram_global</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="k">for</span> <span class="n">tail</span> <span class="ow">in</span> <span class="nb">reversed</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">histogram</span><span class="p">))):</span>
|
|
||||||
<span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">tail</span><span class="p">]</span> <span class="o"><</span> <span class="n">tail_density_threshold</span><span class="p">:</span>
|
|
||||||
<span class="n">buckets</span><span class="p">[:,</span><span class="n">tail</span><span class="p">,</span><span class="mi">2</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">break</span>
|
|
||||||
|
|
||||||
<span class="n">salient_methods</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
|
|
||||||
<span class="n">best_methods</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">bucket</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">buckets</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]):</span>
|
|
||||||
<span class="n">nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[:,</span> <span class="n">bucket</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
|
||||||
<span class="k">if</span> <span class="n">nc</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="n">best_methods</span><span class="o">.</span><span class="n">append</span><span class="p">([])</span>
|
|
||||||
<span class="k">continue</span>
|
|
||||||
|
|
||||||
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">buckets</span><span class="p">[:,</span> <span class="n">bucket</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span>
|
|
||||||
<span class="n">rank1</span> <span class="o">=</span> <span class="n">order</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
|
||||||
<span class="n">best_bucket_methods</span> <span class="o">=</span> <span class="p">[</span><span class="n">method_order</span><span class="p">[</span><span class="n">rank1</span><span class="p">]]</span>
|
|
||||||
<span class="n">best_mean</span><span class="p">,</span> <span class="n">best_std</span><span class="p">,</span> <span class="n">best_nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[</span><span class="n">rank1</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="p">:]</span>
|
|
||||||
<span class="k">for</span> <span class="n">method_index</span> <span class="ow">in</span> <span class="n">order</span><span class="p">[</span><span class="mi">1</span><span class="p">:]:</span>
|
|
||||||
<span class="n">method_mean</span><span class="p">,</span> <span class="n">method_std</span><span class="p">,</span> <span class="n">method_nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[</span><span class="n">method_index</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="p">:]</span>
|
|
||||||
<span class="n">_</span><span class="p">,</span> <span class="n">pval</span> <span class="o">=</span> <span class="n">ttest_ind_from_stats</span><span class="p">(</span><span class="n">best_mean</span><span class="p">,</span> <span class="n">best_std</span><span class="p">,</span> <span class="n">best_nc</span><span class="p">,</span> <span class="n">method_mean</span><span class="p">,</span> <span class="n">method_std</span><span class="p">,</span> <span class="n">method_nc</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">pval</span> <span class="o">></span> <span class="n">ttest_alpha</span><span class="p">:</span>
|
|
||||||
<span class="n">best_bucket_methods</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method_order</span><span class="p">[</span><span class="n">method_index</span><span class="p">])</span>
|
|
||||||
<span class="n">best_methods</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
|
|
||||||
<span class="n">salient_methods</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">binning</span><span class="o">==</span><span class="s1">'isomerous'</span><span class="p">:</span>
|
|
||||||
<span class="n">fig</span><span class="p">,</span> <span class="n">axes</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">gridspec_kw</span><span class="o">=</span><span class="p">{</span><span class="s1">'height_ratios'</span><span class="p">:</span> <span class="p">[</span><span class="mf">0.2</span><span class="p">,</span> <span class="mi">1</span><span class="p">]},</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">salient_methods</span><span class="p">)))</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">fig</span><span class="p">,</span> <span class="n">axes</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">gridspec_kw</span><span class="o">=</span><span class="p">{</span><span class="s1">'height_ratios'</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]},</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">salient_methods</span><span class="p">)))</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span> <span class="o">=</span> <span class="n">axes</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="n">high_from</span> <span class="o">=</span> <span class="mi">0</span>
|
|
||||||
<span class="n">yticks</span><span class="p">,</span> <span class="n">yticks_method_names</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="n">color</span> <span class="o">=</span> <span class="n">get_cmap</span><span class="p">(</span><span class="s1">'Accent'</span><span class="p">)</span><span class="o">.</span><span class="n">colors</span>
|
|
||||||
<span class="n">vlines</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">bar_high</span> <span class="o">=</span> <span class="mi">1</span>
|
|
||||||
<span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="p">[</span><span class="n">m</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span> <span class="k">if</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">salient_methods</span><span class="p">]:</span>
|
|
||||||
<span class="n">broken_paths</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">best_bucket_methods</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">best_methods</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">best_bucket_methods</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">path_start</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
|
||||||
<span class="n">path_end</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">path_start</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">path_end</span> <span class="o">+=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">broken_paths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">tuple</span><span class="p">((</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span><span class="p">)))</span>
|
|
||||||
<span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
|
||||||
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">broken_paths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">tuple</span><span class="p">((</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span><span class="p">)))</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">broken_barh</span><span class="p">(</span><span class="n">broken_paths</span><span class="p">,</span> <span class="p">(</span><span class="n">high_from</span><span class="p">,</span> <span class="n">bar_high</span><span class="p">),</span> <span class="n">facecolors</span><span class="o">=</span><span class="n">color</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">yticks_method_names</span><span class="p">)])</span>
|
|
||||||
<span class="n">yticks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">high_from</span><span class="o">+</span><span class="n">bar_high</span><span class="o">/</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="n">high_from</span> <span class="o">+=</span> <span class="n">bar_high</span>
|
|
||||||
<span class="n">yticks_method_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="ow">in</span> <span class="n">broken_paths</span><span class="p">:</span>
|
|
||||||
<span class="n">vlines</span><span class="o">.</span><span class="n">extend</span><span class="p">([</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_start</span><span class="o">+</span><span class="n">path_end</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="n">vlines</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">vlines</span><span class="p">)</span>
|
|
||||||
<span class="n">vlines</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">vlines</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">vlines</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">axvline</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="n">v</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'k'</span><span class="p">,</span> <span class="n">linestyle</span><span class="o">=</span><span class="s1">'--'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">high_from</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlabel</span><span class="p">(</span><span class="s1">'Distribution shift between training set and sample'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_yticks</span><span class="p">(</span><span class="n">yticks</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_yticklabels</span><span class="p">(</span><span class="n">yticks_method_names</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># upper plot (explaining distribution)</span>
|
|
||||||
<span class="n">ax</span> <span class="o">=</span> <span class="n">axes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
|
||||||
<span class="k">if</span> <span class="n">binning</span> <span class="o">==</span> <span class="s1">'isometric'</span><span class="p">:</span>
|
|
||||||
<span class="c1"># show the density for each region</span>
|
|
||||||
<span class="n">bins</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">=</span><span class="mi">0</span>
|
|
||||||
<span class="n">y_pos</span> <span class="o">=</span> <span class="p">[</span><span class="n">b</span><span class="o">+</span><span class="p">(</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">b</span><span class="p">)</span><span class="o">/</span><span class="mi">2</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">b</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> <span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">></span><span class="mi">0</span><span class="p">]</span>
|
|
||||||
<span class="n">bar_width</span> <span class="o">=</span> <span class="p">[</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span> <span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">></span><span class="mi">0</span><span class="p">]</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">y_pos</span><span class="p">,</span> <span class="p">[</span><span class="n">n</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">histogram</span> <span class="k">if</span> <span class="n">n</span><span class="o">></span><span class="mi">0</span><span class="p">],</span> <span class="n">bar_width</span><span class="p">,</span> <span class="n">align</span><span class="o">=</span><span class="s1">'center'</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'silver'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">'shift</span><span class="se">\n</span><span class="s1">distribution'</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">'right'</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">'center'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">get_xaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">wspace</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">hspace</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="c1"># show the percentiles of the distribution</span>
|
|
||||||
<span class="n">cumsum</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="n">histogram</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])):</span>
|
|
||||||
<span class="n">start</span><span class="p">,</span> <span class="n">width</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">broken_barh</span><span class="p">([</span><span class="nb">tuple</span><span class="p">((</span><span class="n">start</span><span class="p">,</span> <span class="n">width</span><span class="p">))],</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">facecolors</span><span class="o">=</span><span class="s1">'whitesmoke'</span> <span class="k">if</span> <span class="n">i</span><span class="o">%</span><span class="mi">2</span><span class="o">==</span><span class="mi">0</span> <span class="k">else</span> <span class="s1">'silver'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">i</span> <span class="o"><</span> <span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)</span><span class="o">-</span><span class="mi">2</span><span class="p">:</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">],</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'$P_{'</span><span class="o">+</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">cumsum</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">*</span><span class="mi">100</span><span class="p">))</span><span class="si">}</span><span class="s1">'</span><span class="o">+</span><span class="s1">'}$'</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">'center'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">get_yaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">get_xaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">wspace</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">hspace</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
|
||||||
<span class="n">ndims</span> <span class="o">=</span> <span class="n">true_prevs</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="p">{</span><span class="s1">'true'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ndims</span><span class="p">)),</span> <span class="s1">'estim'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ndims</span><span class="p">))})</span>
|
|
||||||
<span class="n">method_order</span><span class="o">=</span><span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'true'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'true'</span><span class="p">],</span> <span class="n">true_prev</span><span class="p">])</span>
|
|
||||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'estim'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'estim'</span><span class="p">],</span> <span class="n">estim_prev</span><span class="p">])</span>
|
|
||||||
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
|
|
||||||
<span class="n">method_order</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
|
|
||||||
<span class="n">true_prevs_</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">'true'</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
|
|
||||||
<span class="n">estim_prevs_</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">'estim'</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
|
|
||||||
<span class="k">return</span> <span class="n">method_order</span><span class="p">,</span> <span class="n">true_prevs_</span><span class="p">,</span> <span class="n">estim_prevs_</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_set_colors</span><span class="p">(</span><span class="n">ax</span><span class="p">,</span> <span class="n">n_methods</span><span class="p">):</span>
|
|
||||||
<span class="n">NUM_COLORS</span> <span class="o">=</span> <span class="n">n_methods</span>
|
|
||||||
<span class="n">cm</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">get_cmap</span><span class="p">(</span><span class="s1">'tab20'</span><span class="p">)</span>
|
|
||||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_prop_cycle</span><span class="p">(</span><span class="n">color</span><span class="o">=</span><span class="p">[</span><span class="n">cm</span><span class="p">(</span><span class="mf">1.</span> <span class="o">*</span> <span class="n">i</span> <span class="o">/</span> <span class="n">NUM_COLORS</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUM_COLORS</span><span class="p">)])</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">):</span>
|
|
||||||
<span class="c1"># if savepath is specified, then saves the plot in that path; otherwise the plot is shown</span>
|
|
||||||
<span class="k">if</span> <span class="n">savepath</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">create_parent_dir</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span>
|
|
||||||
<span class="c1"># plt.tight_layout()</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">savefig</span><span class="p">(</span><span class="n">savepath</span><span class="p">,</span> <span class="n">bbox_inches</span><span class="o">=</span><span class="s1">'tight'</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="p">{</span><span class="s1">'x'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">)),</span> <span class="s1">'y'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">))})</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">method_order</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
|
|
||||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">estim_prevs_i</span><span class="p">,</span> <span class="n">tr_prev_i</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">):</span>
|
|
||||||
<span class="n">tr_prev_i</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">tr_prev_i</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">),</span> <span class="n">repeats</span><span class="o">=</span><span class="n">test_prevs_i</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">x_error</span><span class="p">(</span><span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">tr_prev_i</span><span class="p">)</span>
|
|
||||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'x'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'x'</span><span class="p">],</span> <span class="n">tr_test_drifts</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">y_error</span><span class="p">(</span><span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">estim_prevs_i</span><span class="p">)</span>
|
|
||||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'y'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'y'</span><span class="p">],</span> <span class="n">method_drifts</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
|
|
||||||
<span class="n">method_order</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">return</span> <span class="n">data</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,606 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.protocol — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../_static/jquery.js"></script>
|
|
||||||
<script src="../../_static/underscore.js"></script>
|
|
||||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../_static/doctools.js"></script>
|
|
||||||
<script src="../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.protocol</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.protocol</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">itertools</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">contextlib</span> <span class="kn">import</span> <span class="n">ExitStack</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">exists</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">glob</span> <span class="kn">import</span> <span class="n">glob</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="AbstractProtocol"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractProtocol">[docs]</a><span class="k">class</span> <span class="nc">AbstractProtocol</span><span class="p">(</span><span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Abstract parent class for sample generation protocols.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="nd">@abstractmethod</span>
|
|
||||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implements the protocol. Yields one sample at a time along with its prevalence</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances</span>
|
|
||||||
<span class="sd"> and in which `prev` is an `nd.array` with the class prevalence values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="AbstractProtocol.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractProtocol.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Indicates the total number of samples that the protocol generates.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: The number of samples to generate if known, or `None` otherwise.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="kc">None</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IterateProtocol"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.IterateProtocol">[docs]</a><span class="k">class</span> <span class="nc">IterateProtocol</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> A very simple protocol which simply iterates over a list of previously generated samples</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param samples: a list of :class:`quapy.data.base.LabelledCollection`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">samples</span><span class="p">:</span> <span class="p">[</span><span class="n">LabelledCollection</span><span class="p">]):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span> <span class="o">=</span> <span class="n">samples</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Yields one sample from the initial list at a time</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances</span>
|
|
||||||
<span class="sd"> and in which `prev` is an `nd.array` with the class prevalence values</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">:</span>
|
|
||||||
<span class="k">yield</span> <span class="n">sample</span><span class="o">.</span><span class="n">Xp</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="IterateProtocol.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.IterateProtocol.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the number of samples in this protocol</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: int</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="AbstractStochasticSeededProtocol"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol">[docs]</a><span class="k">class</span> <span class="nc">AbstractStochasticSeededProtocol</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g.,</span>
|
|
||||||
<span class="sd"> via random sampling), sequences of :class:`quapy.data.base.LabelledCollection` samples.</span>
|
|
||||||
<span class="sd"> The protocol abstraction enforces</span>
|
|
||||||
<span class="sd"> the object to be instantiated using a seed, so that the sequence can be fully replicated.</span>
|
|
||||||
<span class="sd"> In order to make this functionality possible, the classes extending this abstraction need to</span>
|
|
||||||
<span class="sd"> implement only two functions, :meth:`samples_parameters` which generates all the parameters</span>
|
|
||||||
<span class="sd"> needed for extracting the samples, and :meth:`sample` that, given some parameters as input,</span>
|
|
||||||
<span class="sd"> deterministically generates a sample.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param random_state: the seed for allowing to replicate any sequence of samples. Default is 0, meaning that</span>
|
|
||||||
<span class="sd"> the sequence will be consistent every time the protocol is called.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">_random_state</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="c1"># means "not set"</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
|
||||||
|
|
||||||
<span class="nd">@property</span>
|
|
||||||
<span class="k">def</span> <span class="nf">random_state</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_random_state</span>
|
|
||||||
|
|
||||||
<span class="nd">@random_state</span><span class="o">.</span><span class="n">setter</span>
|
|
||||||
<span class="k">def</span> <span class="nf">random_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">random_state</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">_random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters">[docs]</a> <span class="nd">@abstractmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> This function has to return all the necessary parameters to replicate the samples</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a list of parameters, each of which serves to deterministically generate a sample</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.sample">[docs]</a> <span class="nd">@abstractmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Extract one sample determined by the given parameters</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param params: all the necessary parameters to generate a sample</span>
|
|
||||||
<span class="sd"> :return: one sample (the same sample has to be generated for the same parameters)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="o">...</span></div>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Yields one sample at a time. The type of object returned depends on the `collator` function. The</span>
|
|
||||||
<span class="sd"> default behaviour returns tuples of the form `(sample, prevalence)`.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a tuple `(sample, prevalence)` if return_type='sample_prev', or an instance of</span>
|
|
||||||
<span class="sd"> :class:`qp.data.LabelledCollection` if return_type='labelled_collection'</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">with</span> <span class="n">ExitStack</span><span class="p">()</span> <span class="k">as</span> <span class="n">stack</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'The random seed has never been initialized. '</span>
|
|
||||||
<span class="s1">'Set it to None not to impose replicability.'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">stack</span><span class="o">.</span><span class="n">enter_context</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">))</span>
|
|
||||||
<span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples_parameters</span><span class="p">():</span>
|
|
||||||
<span class="k">yield</span> <span class="bp">self</span><span class="o">.</span><span class="n">collator</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">params</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.collator"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.collator">[docs]</a> <span class="k">def</span> <span class="nf">collator</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> The collator prepares the sample to accommodate the desired output format before returning the output.</span>
|
|
||||||
<span class="sd"> This collator simply returns the sample as it is. Classes inheriting from this abstract class can</span>
|
|
||||||
<span class="sd"> implement their custom collators.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param sample: the sample to be returned</span>
|
|
||||||
<span class="sd"> :param args: additional arguments</span>
|
|
||||||
<span class="sd"> :return: the sample adhering to a desired output format (in this case, the sample is returned as it is)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">sample</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="OnLabelledCollectionProtocol"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol">[docs]</a><span class="k">class</span> <span class="nc">OnLabelledCollectionProtocol</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Protocols that generate samples from a :class:`qp.data.LabelledCollection` object.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">RETURN_TYPES</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'sample_prev'</span><span class="p">,</span> <span class="s1">'labelled_collection'</span><span class="p">,</span> <span class="s1">'index'</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="OnLabelledCollectionProtocol.get_labelled_collection"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection">[docs]</a> <span class="k">def</span> <span class="nf">get_labelled_collection</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the labelled collection on which this protocol acts.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: an object of type :class:`qp.data.LabelledCollection`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="OnLabelledCollectionProtocol.on_preclassified_instances"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances">[docs]</a> <span class="k">def</span> <span class="nf">on_preclassified_instances</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pre_classifications</span><span class="p">,</span> <span class="n">in_place</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns a copy of this protocol that acts on a modified version of the original</span>
|
|
||||||
<span class="sd"> :class:`qp.data.LabelledCollection` in which the original instances have been replaced</span>
|
|
||||||
<span class="sd"> with the outputs of a classifier for each instance. (This is convenient for speeding-up</span>
|
|
||||||
<span class="sd"> the evaluation procedures for many samples, by pre-classifying the instances in advance.)</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param pre_classifications: the predictions issued by a classifier, typically an array-like</span>
|
|
||||||
<span class="sd"> with shape `(n_instances,)` when the classifier is a hard one, or with shape</span>
|
|
||||||
<span class="sd"> `(n_instances, n_classes)` when the classifier is a probabilistic one.</span>
|
|
||||||
<span class="sd"> :param in_place: whether or not to apply the modification in-place or in a new copy (default).</span>
|
|
||||||
<span class="sd"> :return: a copy of this protocol</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'error: the pre-classified data has different shape '</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'(expected </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">)</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span>
|
|
||||||
<span class="k">if</span> <span class="n">in_place</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">pre_classifications</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">new</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">new</span><span class="o">.</span><span class="n">on_preclassified_instances</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">,</span> <span class="n">in_place</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="OnLabelledCollectionProtocol.get_collator"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.get_collator">[docs]</a> <span class="nd">@classmethod</span>
|
|
||||||
<span class="k">def</span> <span class="nf">get_collator</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns a collator function, i.e., a function that prepares the yielded data</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param return_type: either 'sample_prev' (default) if the collator is requested to yield tuples of</span>
|
|
||||||
<span class="sd"> `(sample, prevalence)`, or 'labelled_collection' when it is requested to yield instances of</span>
|
|
||||||
<span class="sd"> :class:`qp.data.LabelledCollection`</span>
|
|
||||||
<span class="sd"> :return: the collator function (a callable function that takes as input an instance of</span>
|
|
||||||
<span class="sd"> :class:`qp.data.LabelledCollection`)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">assert</span> <span class="n">return_type</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">RETURN_TYPES</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'unknown return type passed as argument; valid ones are </span><span class="si">{</span><span class="bp">cls</span><span class="o">.</span><span class="n">RETURN_TYPES</span><span class="si">}</span><span class="s1">'</span>
|
|
||||||
<span class="k">if</span> <span class="n">return_type</span><span class="o">==</span><span class="s1">'sample_prev'</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="k">lambda</span> <span class="n">lc</span><span class="p">:</span><span class="n">lc</span><span class="o">.</span><span class="n">Xp</span>
|
|
||||||
<span class="k">elif</span> <span class="n">return_type</span><span class="o">==</span><span class="s1">'labelled_collection'</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="k">lambda</span> <span class="n">lc</span><span class="p">:</span><span class="n">lc</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="APP"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP">[docs]</a><span class="k">class</span> <span class="nc">APP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Implementation of the artificial prevalence protocol (APP).</span>
|
|
||||||
<span class="sd"> The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g.,</span>
|
|
||||||
<span class="sd"> [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of</span>
|
|
||||||
<span class="sd"> prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ...,</span>
|
|
||||||
<span class="sd"> [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid</span>
|
|
||||||
<span class="sd"> combination of prevalence values is indicated by `repeats`.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
|
|
||||||
<span class="sd"> :param sample_size: integer, number of instances in each sample; if None (default) then it is taken from</span>
|
|
||||||
<span class="sd"> qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.</span>
|
|
||||||
<span class="sd"> :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the</span>
|
|
||||||
<span class="sd"> grid (default is 21)</span>
|
|
||||||
<span class="sd"> :param repeats: number of copies for each valid prevalence vector (default is 10)</span>
|
|
||||||
<span class="sd"> :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1</span>
|
|
||||||
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
|
|
||||||
<span class="sd"> will be the same every time the protocol is called)</span>
|
|
||||||
<span class="sd"> :param sanity_check: int, raises an exception warning the user that the number of examples to be generated exceed</span>
|
|
||||||
<span class="sd"> this number; set to None for skipping this check</span>
|
|
||||||
<span class="sd"> :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
|
|
||||||
<span class="sd"> to "labelled_collection" to get instead instances of LabelledCollection</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">21</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
|
|
||||||
<span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="mi">10000</span><span class="p">,</span> <span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">(</span><span class="n">APP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span> <span class="o">=</span> <span class="n">n_prevalences</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">smooth_limits_epsilon</span> <span class="o">=</span> <span class="n">smooth_limits_epsilon</span>
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="p">((</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">sanity_check</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">and</span> <span class="n">sanity_check</span><span class="o">></span><span class="mi">0</span><span class="p">)</span> <span class="ow">or</span> <span class="n">sanity_check</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'param "sanity_check" must either be None or a positive integer'</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sanity_check</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
|
||||||
<span class="n">n</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="o">=</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">n</span> <span class="o">></span> <span class="n">sanity_check</span><span class="p">:</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
|
|
||||||
<span class="sa">f</span><span class="s2">"Abort: the number of samples that will be generated by </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">n</span><span class="si">}</span><span class="s2">) "</span>
|
|
||||||
<span class="sa">f</span><span class="s2">"exceeds the maximum number of allowed samples (</span><span class="si">{</span><span class="n">sanity_check</span><span class="w"> </span><span class="si">= }</span><span class="s2">). Set 'sanity_check' to "</span>
|
|
||||||
<span class="sa">f</span><span class="s2">"None, or to a higher number, for bypassing this check."</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="APP.prevalence_grid"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.prevalence_grid">[docs]</a> <span class="k">def</span> <span class="nf">prevalence_grid</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generates vectors of prevalence values from an exhaustive grid of prevalence values. The</span>
|
|
||||||
<span class="sd"> number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example,</span>
|
|
||||||
<span class="sd"> `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only</span>
|
|
||||||
<span class="sd"> valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each</span>
|
|
||||||
<span class="sd"> valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be</span>
|
|
||||||
<span class="sd"> implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained</span>
|
|
||||||
<span class="sd"> to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to</span>
|
|
||||||
<span class="sd"> 1). Note that this method is deterministic, i.e., there is no random sampling anywhere.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape</span>
|
|
||||||
<span class="sd"> `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found</span>
|
|
||||||
<span class="sd"> in the grid multiplied by `repeat`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">dimensions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
|
|
||||||
<span class="n">s</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_linspace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">smooth_limits_epsilon</span><span class="p">)</span>
|
|
||||||
<span class="n">eps</span> <span class="o">=</span> <span class="p">(</span><span class="n">s</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="mi">2</span> <span class="c1"># handling floating rounding</span>
|
|
||||||
<span class="n">s</span> <span class="o">=</span> <span class="p">[</span><span class="n">s</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="n">dimensions</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">p</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="o">*</span><span class="n">s</span><span class="p">,</span> <span class="n">repeat</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="k">if</span> <span class="p">(</span><span class="nb">sum</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> <span class="o"><</span> <span class="p">(</span><span class="mf">1.</span><span class="o">+</span><span class="n">eps</span><span class="p">))]</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">),</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">prevs</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="APP.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.samples_parameters">[docs]</a> <span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the APP protocol.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a list of indexes that realize the APP sampling</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">prevs</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence_grid</span><span class="p">():</span>
|
|
||||||
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">)</span>
|
|
||||||
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">indexes</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="APP.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Realizes the sample given the index of the instances.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param index: indexes of the instances to select</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="APP.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the number of samples that will be generated</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: int</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NPP"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP">[docs]</a><span class="k">class</span> <span class="nc">NPP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing</span>
|
|
||||||
<span class="sd"> samples uniformly at random, therefore approximately preserving the natural prevalence of the collection.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
|
|
||||||
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
|
|
||||||
<span class="sd"> qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.</span>
|
|
||||||
<span class="sd"> :param repeats: the number of samples to generate. Default is 100.</span>
|
|
||||||
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
|
|
||||||
<span class="sd"> will be the same every time the protocol is called)</span>
|
|
||||||
<span class="sd"> :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
|
|
||||||
<span class="sd"> to "labelled_collection" to get instead instances of LabelledCollection</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
|
||||||
<span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">(</span><span class="n">NPP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NPP.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.samples_parameters">[docs]</a> <span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the NPP protocol.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a list of indexes that realize the NPP sampling</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
|
|
||||||
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">)</span>
|
|
||||||
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">indexes</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NPP.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Realizes the sample given the index of the instances.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param index: indexes of the instances to select</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="NPP.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the number of samples that will be generated (equals to "repeats")</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: int</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="UPP"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP">[docs]</a><span class="k">class</span> <span class="nc">UPP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> A variant of :class:`APP` that, instead of using a grid of equidistant prevalence values,</span>
|
|
||||||
<span class="sd"> relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with</span>
|
|
||||||
<span class="sd"> k the number of classes. This protocol covers the entire range of prevalence values in a</span>
|
|
||||||
<span class="sd"> statistical sense, i.e., unlike APP there is no guarantee that it is covered precisely</span>
|
|
||||||
<span class="sd"> equally for all classes, but it is preferred in cases in which the number of possible</span>
|
|
||||||
<span class="sd"> combinations of the grid values of APP makes this endeavour intractable.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
|
|
||||||
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
|
|
||||||
<span class="sd"> qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.</span>
|
|
||||||
<span class="sd"> :param repeats: the number of samples to generate. Default is 100.</span>
|
|
||||||
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
|
|
||||||
<span class="sd"> will be the same every time the protocol is called)</span>
|
|
||||||
<span class="sd"> :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
|
|
||||||
<span class="sd"> to "labelled_collection" to get instead instances of LabelledCollection</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
|
||||||
<span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">(</span><span class="n">UPP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="UPP.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.samples_parameters">[docs]</a> <span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the UPP protocol.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a list of indexes that realize the UPP sampling</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">prevs</span> <span class="ow">in</span> <span class="n">F</span><span class="o">.</span><span class="n">uniform_simplex_sampling</span><span class="p">(</span><span class="n">n_classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
|
|
||||||
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">)</span>
|
|
||||||
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">indexes</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="UPP.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Realizes the sample given the index of the instances.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param index: indexes of the instances to select</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="UPP.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the number of samples that will be generated (equals to "repeats")</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: int</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="DomainMixer"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer">[docs]</a><span class="k">class</span> <span class="nc">DomainMixer</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param domainA: one domain, an object of :class:`qp.data.LabelledCollection`</span>
|
|
||||||
<span class="sd"> :param domainB: another domain, an object of :class:`qp.data.LabelledCollection`</span>
|
|
||||||
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
|
|
||||||
<span class="sd"> qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.</span>
|
|
||||||
<span class="sd"> :param repeats: int, number of samples to draw for every mixture rate</span>
|
|
||||||
<span class="sd"> :param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing</span>
|
|
||||||
<span class="sd"> one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence</span>
|
|
||||||
<span class="sd"> will be taken from the domain A (default).</span>
|
|
||||||
<span class="sd"> :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will</span>
|
|
||||||
<span class="sd"> generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself.</span>
|
|
||||||
<span class="sd"> the specific points</span>
|
|
||||||
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
|
|
||||||
<span class="sd"> will be the same every time the protocol is called)</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
|
|
||||||
<span class="bp">self</span><span class="p">,</span>
|
|
||||||
<span class="n">domainA</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span>
|
|
||||||
<span class="n">domainB</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span>
|
|
||||||
<span class="n">sample_size</span><span class="p">,</span>
|
|
||||||
<span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
|
||||||
<span class="n">prevalence</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
|
||||||
<span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span>
|
|
||||||
<span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
|
||||||
<span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
|
||||||
<span class="nb">super</span><span class="p">(</span><span class="n">DomainMixer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">A</span> <span class="o">=</span> <span class="n">domainA</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">B</span> <span class="o">=</span> <span class="n">domainB</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
|
|
||||||
<span class="k">if</span> <span class="n">prevalence</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span> <span class="o">=</span> <span class="n">domainA</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevalence</span><span class="p">)</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span> <span class="o">==</span> <span class="n">domainA</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'wrong shape for the vector prevalence (expected </span><span class="si">{</span><span class="n">domainA</span><span class="o">.</span><span class="n">n_classes</span><span class="si">}</span><span class="s1">)'</span>
|
|
||||||
<span class="k">assert</span> <span class="n">F</span><span class="o">.</span><span class="n">check_prevalence_vector</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">),</span> \
|
|
||||||
<span class="sa">f</span><span class="s1">'the prevalence vector is not valid (either it contains values outside [0,1] or does not sum up to 1)'</span>
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">mixture_points</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">mixture_points</span><span class="p">)[::</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">mixture_points</span><span class="p">)</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">>=</span> <span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="o"><=</span><span class="mi">1</span><span class="p">)),</span> \
|
|
||||||
<span class="s1">'mixture_model datatype not understood (expected int or a sequence of real values in [0,1])'</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="DomainMixer.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.samples_parameters">[docs]</a> <span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the this protocol.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a list of zipped indexes (from A and B) that realize the sampling</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">propA</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="p">:</span>
|
|
||||||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
|
|
||||||
<span class="n">nA</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">*</span> <span class="n">propA</span><span class="p">))</span>
|
|
||||||
<span class="n">nB</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="o">-</span><span class="n">nA</span>
|
|
||||||
<span class="n">sampleAidx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">nA</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span>
|
|
||||||
<span class="n">sampleBidx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">B</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">nB</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span>
|
|
||||||
<span class="n">indexesA</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sampleAidx</span><span class="p">)</span>
|
|
||||||
<span class="n">indexesB</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sampleBidx</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="DomainMixer.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">indexes</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Realizes the sample given a pair of indexes of the instances from A and B.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param indexes: indexes of the instances to select from A and B</span>
|
|
||||||
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span> <span class="o">=</span> <span class="n">indexes</span>
|
|
||||||
<span class="n">sampleA</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">indexesA</span><span class="p">)</span>
|
|
||||||
<span class="n">sampleB</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">B</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">indexesB</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">sampleA</span><span class="o">+</span><span class="n">sampleB</span></div>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="DomainMixer.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Returns the number of samples that will be generated (equals to "repeats * mixture_points")</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: int</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="p">)</span></div></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="c1"># aliases</span>
|
|
||||||
|
|
||||||
<span class="n">ArtificialPrevalenceProtocol</span> <span class="o">=</span> <span class="n">APP</span>
|
|
||||||
<span class="n">NaturalPrevalenceProtocol</span> <span class="o">=</span> <span class="n">NPP</span>
|
|
||||||
<span class="n">UniformPrevalenceProtocol</span> <span class="o">=</span> <span class="n">UPP</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,110 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_base</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_base</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">pytest</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_import">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_base.test_import">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_import</span><span class="p">():</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="k">assert</span> <span class="n">qp</span><span class="o">.</span><span class="n">__version__</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span></div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,178 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_datasets — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_datasets</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_datasets</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">pytest</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data.datasets</span> <span class="kn">import</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">,</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="p">,</span> \
|
|
||||||
<span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="p">,</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">,</span> <span class="n">LEQUA2022_TASKS</span><span class="p">,</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">,</span>\
|
|
||||||
<span class="n">fetch_reviews</span><span class="p">,</span> <span class="n">fetch_twitter</span><span class="p">,</span> <span class="n">fetch_UCIBinaryDataset</span><span class="p">,</span> <span class="n">fetch_lequa2022</span><span class="p">,</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_fetch_reviews">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_reviews">[docs]</a>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">)</span>
|
|
||||||
<span class="k">def</span> <span class="nf">test_fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Training set stats'</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test set stats'</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_fetch_twitter">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_twitter">[docs]</a>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span> <span class="o">+</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="p">)</span>
|
|
||||||
<span class="k">def</span> <span class="nf">test_fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
|
||||||
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">ve</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'semeval'</span> <span class="ow">and</span> <span class="n">ve</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span>
|
|
||||||
<span class="s1">'dataset "semeval" can only be used for model selection.'</span><span class="p">):</span>
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Training set stats'</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test set stats'</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_fetch_UCIDataset">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_UCIDataset">[docs]</a>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">)</span>
|
|
||||||
<span class="k">def</span> <span class="nf">test_fetch_UCIDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
|
||||||
<span class="k">except</span> <span class="ne">FileNotFoundError</span> <span class="k">as</span> <span class="n">fnfe</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'pageblocks.5'</span> <span class="ow">and</span> <span class="n">fnfe</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">find</span><span class="p">(</span>
|
|
||||||
<span class="s1">'If this is the first time you attempt to load this dataset'</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'The pageblocks.5 dataset requires some hand processing to be usable, skipping this test.'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Training set stats'</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test set stats'</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_fetch_UCIMultiDataset">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_UCIMultiDataset">[docs]</a>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">)</span>
|
|
||||||
<span class="k">def</span> <span class="nf">test_fetch_UCIMultiDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Training set stats'</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test set stats'</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_fetch_lequa2022">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_lequa2022">[docs]</a>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">LEQUA2022_TASKS</span><span class="p">)</span>
|
|
||||||
<span class="k">def</span> <span class="nf">test_fetch_lequa2022</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">gen_val</span><span class="p">,</span> <span class="n">gen_test</span> <span class="o">=</span> <span class="n">fetch_lequa2022</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">train</span><span class="o">.</span><span class="n">stats</span><span class="p">())</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Val:'</span><span class="p">,</span> <span class="n">gen_val</span><span class="o">.</span><span class="n">total</span><span class="p">())</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test:'</span><span class="p">,</span> <span class="n">gen_test</span><span class="o">.</span><span class="n">total</span><span class="p">())</span></div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,195 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_evaluation — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_evaluation</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_evaluation</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.error</span> <span class="kn">import</span> <span class="n">QUANTIFICATION_ERROR_SINGLE</span><span class="p">,</span> <span class="n">QUANTIFICATION_ERROR</span><span class="p">,</span> <span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">,</span> \
|
|
||||||
<span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">PCC</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="EvalTestCase">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">EvalTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
|
||||||
<div class="viewcode-block" id="EvalTestCase.test_eval_speedup">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase.test_eval_speedup">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_eval_speedup</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'hp'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">test</span>
|
|
||||||
|
|
||||||
<span class="n">protocol</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
|
|
||||||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">time</span>
|
|
||||||
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">emq</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
|
||||||
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">emq</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="s1">'force'</span><span class="p">)</span>
|
|
||||||
<span class="n">tend_optim</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'evaluation (with optimization) took </span><span class="si">{</span><span class="n">tend_optim</span><span class="si">}</span><span class="s1">s [MAE=</span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">]'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">class</span> <span class="nc">NonAggregativeEMQ</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">cls</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">emq</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">emq</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">emq</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span>
|
|
||||||
|
|
||||||
<span class="n">emq</span> <span class="o">=</span> <span class="n">NonAggregativeEMQ</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
|
||||||
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">emq</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">tend_no_optim</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'evaluation (w/o optimization) took </span><span class="si">{</span><span class="n">tend_no_optim</span><span class="si">}</span><span class="s1">s [MAE=</span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">]'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">tend_no_optim</span><span class="o">></span><span class="p">(</span><span class="n">tend_optim</span><span class="o">/</span><span class="mi">2</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="EvalTestCase.test_evaluation_output">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase.test_evaluation_output">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_evaluation_output</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'hp'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">test</span>
|
|
||||||
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span><span class="o">=</span><span class="mi">100</span>
|
|
||||||
|
|
||||||
<span class="n">protocol</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PCC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">single_errors</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span><span class="p">)</span>
|
|
||||||
<span class="n">averaged_errors</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'m'</span><span class="o">+</span><span class="n">e</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">single_errors</span><span class="p">]</span>
|
|
||||||
<span class="n">single_errors</span> <span class="o">=</span> <span class="n">single_errors</span> <span class="o">+</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">single_errors</span><span class="p">]</span>
|
|
||||||
<span class="n">averaged_errors</span> <span class="o">=</span> <span class="n">averaged_errors</span> <span class="o">+</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">averaged_errors</span><span class="p">]</span>
|
|
||||||
<span class="k">for</span> <span class="n">error_metric</span><span class="p">,</span> <span class="n">averaged_error_metric</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">single_errors</span><span class="p">,</span> <span class="n">averaged_errors</span><span class="p">):</span>
|
|
||||||
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="n">averaged_error_metric</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">score</span><span class="p">,</span> <span class="nb">float</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">scores</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="n">error_metric</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">scores</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">scores</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> <span class="n">score</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
|
||||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_hierarchy — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_hierarchy</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_hierarchy</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="o">*</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="HierarchyTestCase">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">HierarchyTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="HierarchyTestCase.test_aggregative">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_aggregative">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
|
|
||||||
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">CC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PCC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">ACC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="HierarchyTestCase.test_binary">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_binary">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
|
|
||||||
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">HDy</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="HierarchyTestCase.test_probabilistic">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_probabilistic">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_probabilistic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
|
|
||||||
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">CC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">ACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeCrispQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">),</span> <span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">PCC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeCrispQuantifier</span><span class="p">),</span> <span class="kc">False</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
|
||||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,176 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_labelcollection — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_labelcollection</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_labelcollection</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">csr_matrix</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelCollectionTestCase">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">LabelCollectionTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
|
||||||
<div class="viewcode-block" id="LabelCollectionTestCase.test_split">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase.test_split">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_split</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">100</span><span class="p">)</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="n">tr</span><span class="p">,</span> <span class="n">te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_random</span><span class="p">(</span><span class="mf">0.7</span><span class="p">)</span>
|
|
||||||
<span class="n">check_prev</span> <span class="o">=</span> <span class="n">tr</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span><span class="o">*</span><span class="mf">0.7</span> <span class="o">+</span> <span class="n">te</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span><span class="o">*</span><span class="mf">0.3</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tr</span><span class="p">),</span> <span class="mi">70</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">te</span><span class="p">),</span> <span class="mi">30</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">check_prev</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()),</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tr</span><span class="o">+</span><span class="n">te</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="LabelCollectionTestCase.test_join">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase.test_join">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_join</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">50</span><span class="p">)</span>
|
|
||||||
<span class="n">data1</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">200</span><span class="p">)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
|
|
||||||
<span class="n">data2</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
|
|
||||||
<span class="n">data3</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data1</span><span class="p">,</span> <span class="n">data2</span><span class="p">,</span> <span class="n">data3</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data1</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data2</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data3</span><span class="p">))</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">all</span><span class="p">(</span><span class="n">combined</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">6</span><span class="p">)),</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
|
||||||
<span class="n">data4</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
|
||||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data1</span><span class="p">,</span> <span class="n">data2</span><span class="p">,</span> <span class="n">data3</span><span class="p">,</span> <span class="n">data4</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">20</span><span class="p">)</span>
|
|
||||||
<span class="n">data5</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data4</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data5</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
|
||||||
<span class="n">data6</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
|
||||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">,</span> <span class="n">data6</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">data4</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">csr_matrix</span><span class="p">(</span><span class="n">data4</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
|
||||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
|
|
||||||
<span class="n">data5</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">csr_matrix</span><span class="p">(</span><span class="n">data5</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data4</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">data5</span><span class="p">))</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
|
||||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,357 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_methods — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_methods</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_methods</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">pytest</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.svm</span> <span class="kn">import</span> <span class="n">LinearSVC</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">method.aggregative</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BinaryQuantifier</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method</span> <span class="kn">import</span> <span class="n">AGGREGATIVE_METHODS</span><span class="p">,</span> <span class="n">NON_AGGREGATIVE_METHODS</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">Ensemble</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">DMy</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">MedianEstimator</span>
|
|
||||||
|
|
||||||
<span class="c1"># datasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True), id='hcr'),</span>
|
|
||||||
<span class="c1"># pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]</span>
|
|
||||||
|
|
||||||
<span class="n">tinydatasets</span> <span class="o">=</span> <span class="p">[</span><span class="n">pytest</span><span class="o">.</span><span class="n">param</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_twitter</span><span class="p">(</span><span class="s1">'hcr'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(),</span> <span class="nb">id</span><span class="o">=</span><span class="s1">'tiny_hcr'</span><span class="p">),</span>
|
|
||||||
<span class="n">pytest</span><span class="o">.</span><span class="n">param</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="s1">'ionosphere'</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(),</span> <span class="nb">id</span><span class="o">=</span><span class="s1">'tiny_ionosphere'</span><span class="p">)]</span>
|
|
||||||
|
|
||||||
<span class="n">learners</span> <span class="o">=</span> <span class="p">[</span><span class="n">LogisticRegression</span><span class="p">,</span> <span class="n">LinearSVC</span><span class="p">]</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_aggregative_methods">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_aggregative_methods">[docs]</a>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset'</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'aggregative_method'</span><span class="p">,</span> <span class="n">AGGREGATIVE_METHODS</span><span class="p">)</span>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'learner'</span><span class="p">,</span> <span class="n">learners</span><span class="p">)</span>
|
|
||||||
<span class="k">def</span> <span class="nf">test_aggregative_methods</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">aggregative_method</span><span class="p">,</span> <span class="n">learner</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">aggregative_method</span><span class="p">(</span><span class="n">learner</span><span class="p">())</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'skipping the test of binary model </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">model</span><span class="p">)</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span>
|
|
||||||
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_non_aggregative_methods">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_non_aggregative_methods">[docs]</a>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset'</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'non_aggregative_method'</span><span class="p">,</span> <span class="n">NON_AGGREGATIVE_METHODS</span><span class="p">)</span>
|
|
||||||
<span class="k">def</span> <span class="nf">test_non_aggregative_methods</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">non_aggregative_method</span><span class="p">):</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">non_aggregative_method</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'skipping the test of binary model </span><span class="si">{</span><span class="n">model</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span>
|
|
||||||
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_ensemble_method">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_ensemble_method">[docs]</a>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'base_method'</span><span class="p">,</span> <span class="p">[</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">ACC</span><span class="p">,</span> <span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">PACC</span><span class="p">])</span>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'learner'</span><span class="p">,</span> <span class="p">[</span><span class="n">LogisticRegression</span><span class="p">])</span>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset'</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
|
|
||||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'policy'</span><span class="p">,</span> <span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="p">)</span>
|
|
||||||
<span class="k">def</span> <span class="nf">test_ensemble_method</span><span class="p">(</span><span class="n">base_method</span><span class="p">,</span> <span class="n">learner</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">policy</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">20</span>
|
|
||||||
|
|
||||||
<span class="n">base_quantifier</span><span class="o">=</span><span class="n">base_method</span><span class="p">(</span><span class="n">learner</span><span class="p">())</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span> <span class="ow">and</span> <span class="n">policy</span><span class="o">==</span><span class="s1">'ds'</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'skipping the test of binary policy ds on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span>
|
|
||||||
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">Ensemble</span><span class="p">(</span><span class="n">quantifier</span><span class="o">=</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">policy</span><span class="o">=</span><span class="n">policy</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_quanet_method">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_quanet_method">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_quanet_method</span><span class="p">():</span>
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy.classification.neural</span>
|
|
||||||
<span class="k">except</span> <span class="ne">ModuleNotFoundError</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'skipping QuaNet test due to missing torch package'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span>
|
|
||||||
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
|
|
||||||
|
|
||||||
<span class="c1"># load the kindle dataset as text, and convert words to numerical indexes</span>
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'kindle'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="mi">200</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">preprocessing</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.classification.neural</span> <span class="kn">import</span> <span class="n">CNNnet</span>
|
|
||||||
<span class="n">cnn</span> <span class="o">=</span> <span class="n">CNNnet</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.classification.neural</span> <span class="kn">import</span> <span class="n">NeuralClassifierTrainer</span>
|
|
||||||
<span class="n">learner</span> <span class="o">=</span> <span class="n">NeuralClassifierTrainer</span><span class="p">(</span><span class="n">cnn</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">QuaNet</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">QuaNet</span><span class="p">(</span><span class="n">learner</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'skipping the test of binary model </span><span class="si">{</span><span class="n">model</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span>
|
|
||||||
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_str_label_names">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_str_label_names">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_str_label_names</span><span class="p">():</span>
|
|
||||||
<span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">CC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
|
||||||
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">1000</span><span class="p">,</span> <span class="o">*</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()),</span>
|
|
||||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">1000</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">))</span>
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">preprocessing</span><span class="o">.</span><span class="n">text2tfidf</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">int_estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">int_estim_prevalences</span><span class="p">)</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
|
|
||||||
|
|
||||||
<span class="n">dataset_str</span> <span class="o">=</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span>
|
|
||||||
<span class="p">[</span><span class="s1">'one'</span> <span class="k">if</span> <span class="n">label</span> <span class="o">==</span> <span class="mi">1</span> <span class="k">else</span> <span class="s1">'zero'</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">]),</span>
|
|
||||||
<span class="n">LabelledCollection</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span>
|
|
||||||
<span class="p">[</span><span class="s1">'one'</span> <span class="k">if</span> <span class="n">label</span> <span class="o">==</span> <span class="mi">1</span> <span class="k">else</span> <span class="s1">'zero'</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">]))</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="s1">'wrong indexation'</span>
|
|
||||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">str_estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
|
||||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">str_estim_prevalences</span><span class="p">)</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
|
|
||||||
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">int_estim_prevalences</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">str_estim_prevalences</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">np</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">assert_almost_equal</span><span class="p">(</span><span class="n">int_estim_prevalences</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
|
|
||||||
<span class="n">str_estim_prevalences</span><span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s1">'one'</span><span class="p">)])</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="c1"># helper</span>
|
|
||||||
<span class="k">def</span> <span class="nf">__fit_test</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">):</span>
|
|
||||||
<span class="n">quantifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
|
||||||
<span class="n">test_samples</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">)</span>
|
|
||||||
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">prediction</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">test_samples</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">),</span> <span class="n">estim_prevs</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_median_meta">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_median_meta">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_median_meta</span><span class="p">():</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> This test compares the performance of the MedianQuantifier with respect to computing the median of the predictions</span>
|
|
||||||
<span class="sd"> of a differently parameterized quantifier. We use the DistributionMatching base quantifier and the median is</span>
|
|
||||||
<span class="sd"> computed across different values of nbins</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
|
|
||||||
|
|
||||||
<span class="c1"># grid of values</span>
|
|
||||||
<span class="n">nbins_grid</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">11</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="s1">'kindle'</span>
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="n">errors</span> <span class="o">=</span> <span class="p">[]</span>
|
|
||||||
<span class="k">for</span> <span class="n">nbins</span> <span class="ow">in</span> <span class="n">nbins_grid</span><span class="p">:</span>
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">nbins</span><span class="o">=</span><span class="n">nbins</span><span class="p">)</span>
|
|
||||||
<span class="n">mae</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prevs</span><span class="p">)</span>
|
|
||||||
<span class="n">errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">mae</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1"> DistributionMatching(nbins=</span><span class="si">{</span><span class="n">nbins</span><span class="si">}</span><span class="s1">) got MAE </span><span class="si">{</span><span class="n">mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span>
|
|
||||||
<span class="n">mae</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">errors</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">'nbins'</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">median_mae</span><span class="p">,</span> <span class="n">prev</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">median_mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">np</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">assert_almost_equal</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">),</span> <span class="n">prev</span><span class="p">)</span>
|
|
||||||
<span class="k">assert</span> <span class="n">median_mae</span> <span class="o"><</span> <span class="n">mae</span><span class="p">,</span> <span class="s1">'the median-based quantifier provided a higher error...'</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="test_median_meta_modsel">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_median_meta_modsel">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_median_meta_modsel</span><span class="p">():</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> This test checks the median-meta quantifier with model selection</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
|
|
||||||
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="s1">'kindle'</span>
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">train</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">nbins_grid</span> <span class="o">=</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">15</span><span class="p">]</span>
|
|
||||||
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">'nbins'</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">median_mae</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">median_mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
|
||||||
<span class="n">lr_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)}</span>
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">'nbins'</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">lr_params</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">APP</span><span class="p">(</span><span class="n">val</span><span class="p">),</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">optimized_median_ave</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">optimized_median_ave</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">assert</span> <span class="n">optimized_median_ave</span> <span class="o"><</span> <span class="n">median_mae</span><span class="p">,</span> <span class="s2">"the optimized method yielded worse performance..."</span></div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,225 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_modsel — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_modsel</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_modsel</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.svm</span> <span class="kn">import</span> <span class="n">SVC</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">PACC</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">time</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ModselTestCase">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">ModselTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ModselTestCase.test_modsel">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_modsel</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
|
||||||
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
|
|
||||||
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
|
|
||||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
|
||||||
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'best params'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'best score'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_score_</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">[</span><span class="s1">'classifier__C'</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">()[</span><span class="s1">'classifier__C'</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ModselTestCase.test_modsel_parallel">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_parallel">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_modsel_parallel</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
|
||||||
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="c1"># test = data.test</span>
|
|
||||||
|
|
||||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
|
|
||||||
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
|
|
||||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
|
||||||
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'best params'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'best score'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_score_</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">[</span><span class="s1">'classifier__C'</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">()[</span><span class="s1">'classifier__C'</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ModselTestCase.test_modsel_parallel_speedup">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_parallel_speedup">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_modsel_parallel_speedup</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
|
|
||||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
|
||||||
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">7</span><span class="p">)}</span>
|
|
||||||
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
|
|
||||||
<span class="n">GridSearchQ</span><span class="p">(</span>
|
|
||||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
|
||||||
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
<span class="n">tend_nooptim</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
|
|
||||||
|
|
||||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
|
|
||||||
<span class="n">GridSearchQ</span><span class="p">(</span>
|
|
||||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
|
||||||
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
|
||||||
<span class="n">tend_optim</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
|
|
||||||
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'parallel training took </span><span class="si">{</span><span class="n">tend_optim</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s'</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'sequential training took </span><span class="si">{</span><span class="n">tend_nooptim</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">tend_optim</span> <span class="o"><</span> <span class="p">(</span><span class="mf">0.5</span><span class="o">*</span><span class="n">tend_nooptim</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="ModselTestCase.test_modsel_timeout">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_timeout">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_modsel_timeout</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
|
|
||||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">time</span>
|
|
||||||
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
|
|
||||||
<span class="nb">super</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span>
|
|
||||||
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
|
||||||
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="c1"># test = data.test</span>
|
|
||||||
|
|
||||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
|
|
||||||
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
|
|
||||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">TimeoutError</span><span class="p">):</span>
|
|
||||||
<span class="n">q</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
|
||||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,336 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_protocols — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_protocols</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_protocols</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy.functional</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span><span class="p">,</span> <span class="n">NPP</span><span class="p">,</span> <span class="n">UPP</span><span class="p">,</span> <span class="n">DomainMixer</span><span class="p">,</span> <span class="n">AbstractStochasticSeededProtocol</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="mock_labelled_collection">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.mock_labelled_collection">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">mock_labelled_collection</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span>
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="p">[</span><span class="n">prefix</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">yi</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">y</span><span class="p">)]</span>
|
|
||||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="samples_to_str">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.samples_to_str">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">samples_to_str</span><span class="p">(</span><span class="n">protocol</span><span class="p">):</span>
|
|
||||||
<span class="n">samples_str</span> <span class="o">=</span> <span class="s2">""</span>
|
|
||||||
<span class="k">for</span> <span class="n">instances</span><span class="p">,</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">protocol</span><span class="p">():</span>
|
|
||||||
<span class="n">samples_str</span> <span class="o">+=</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">instances</span><span class="si">}</span><span class="se">\t</span><span class="si">{</span><span class="n">prev</span><span class="si">}</span><span class="se">\n</span><span class="s1">'</span>
|
|
||||||
<span class="k">return</span> <span class="n">samples_str</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">TestProtocols</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_app_sanity_check">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_sanity_check">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_app_sanity_check</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
<span class="n">n_prevpoints</span> <span class="o">=</span> <span class="mi">101</span>
|
|
||||||
<span class="n">repeats</span> <span class="o">=</span> <span class="mi">10</span>
|
|
||||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
|
||||||
<span class="n">n_combinations</span> <span class="o">=</span> \
|
|
||||||
<span class="n">quapy</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="n">n_combinations</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_app_replicate">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_replicate">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_app_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">)</span> <span class="c1"># <- random_state is by default set to 0</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_app_not_replicate">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_not_replicate">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_app_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_app_number">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_number">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_app_number</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="c1"># surprisingly enough, for some n_prevalences the test fails, notwithstanding</span>
|
|
||||||
<span class="c1"># everything is correct. The problem is that in function APP.prevalence_grid()</span>
|
|
||||||
<span class="c1"># there is sometimes one rounding error that gets cumulated and</span>
|
|
||||||
<span class="c1"># surpasses 1.0 (by a very small float value, 0.0000000000002 or sthe like)</span>
|
|
||||||
<span class="c1"># so these tuples are mistakenly removed... I have tried with np.close, and</span>
|
|
||||||
<span class="c1"># other workarounds, but eventually happens that there is some negative probability</span>
|
|
||||||
<span class="c1"># in the sampling function...</span>
|
|
||||||
|
|
||||||
<span class="n">count</span> <span class="o">=</span> <span class="mi">0</span>
|
|
||||||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">p</span><span class="p">():</span>
|
|
||||||
<span class="n">count</span><span class="o">+=</span><span class="mi">1</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">count</span><span class="p">,</span> <span class="n">p</span><span class="o">.</span><span class="n">total</span><span class="p">())</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_npp_replicate">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_npp_replicate">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_npp_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span> <span class="c1"># <- random_state is by default set to 0</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_npp_not_replicate">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_npp_not_replicate">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_npp_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_kraemer_replicate">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_kraemer_replicate">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_kraemer_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span> <span class="c1"># <- random_state is by default set to 0</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_kraemer_not_replicate">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_kraemer_not_replicate">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_kraemer_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_covariate_shift_replicate">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_covariate_shift_replicate">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_covariate_shift_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">dataA</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">'domA'</span><span class="p">)</span>
|
|
||||||
<span class="n">dataB</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">'domB'</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">)</span> <span class="c1"># <- random_state is by default set to 0</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_covariate_shift_not_replicate">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_covariate_shift_not_replicate">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_covariate_shift_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="n">dataA</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">'domA'</span><span class="p">)</span>
|
|
||||||
<span class="n">dataB</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">'domB'</span><span class="p">)</span>
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="TestProtocols.test_no_seed_init">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_no_seed_init">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_no_seed_init</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="k">class</span> <span class="nc">NoSeedInit</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">):</span>
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
<span class="c1"># return a matrix containing sampling indexes in the rows</span>
|
|
||||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">),</span> <span class="mi">10</span><span class="o">*</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
|
||||||
<span class="n">index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NoSeedInit</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="c1"># this should raise a ValueError, since the class is said to be AbstractStochasticSeededProtocol but the</span>
|
|
||||||
<span class="c1"># random_seed has never been passed to super(NoSeedInit, self).__init__(random_seed)</span>
|
|
||||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">ValueError</span><span class="p">):</span>
|
|
||||||
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="n">p</span><span class="p">():</span>
|
|
||||||
<span class="k">pass</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'done'</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
|
||||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,225 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.tests.test_replicability — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
|
||||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
||||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
|
||||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
|
||||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
||||||
<script src="../../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.tests.test_replicability</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.tests.test_replicability</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">strprev</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">PACC</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MyTestCase">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase">[docs]</a>
|
|
||||||
<span class="k">class</span> <span class="nc">MyTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MyTestCase.test_prediction_replicability">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_prediction_replicability">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_prediction_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="s1">'yeast'</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">10000</span><span class="p">)</span>
|
|
||||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)</span>
|
|
||||||
<span class="n">prev</span> <span class="o">=</span> <span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="n">str_prev1</span> <span class="o">=</span> <span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">10000</span><span class="p">)</span>
|
|
||||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)</span>
|
|
||||||
<span class="n">prev2</span> <span class="o">=</span> <span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
|
|
||||||
<span class="n">str_prev2</span> <span class="o">=</span> <span class="n">strprev</span><span class="p">(</span><span class="n">prev2</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">str_prev1</span><span class="p">,</span> <span class="n">str_prev2</span><span class="p">)</span> <span class="c1"># add assertion here</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MyTestCase.test_samping_replicability">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_samping_replicability">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_samping_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">equal_collections</span><span class="p">(</span><span class="n">c1</span><span class="p">,</span> <span class="n">c2</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">Xtr</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">Xtr</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">ytr</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">ytr</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">value</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">X</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="nb">range</span><span class="p">(</span><span class="mi">100</span><span class="p">)))</span>
|
|
||||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
|
|
||||||
<span class="n">data</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="o">=</span><span class="n">X</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">y</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
|
|
||||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">])</span>
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">])</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample1_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">sample2_tr</span><span class="p">,</span> <span class="n">sample2_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample2_tr</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_te</span><span class="p">,</span> <span class="n">sample2_te</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample1_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">sample2_tr</span><span class="p">,</span> <span class="n">sample2_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">)</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample2_tr</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_te</span><span class="p">,</span> <span class="n">sample2_te</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="MyTestCase.test_parallel_replicability">
|
|
||||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_parallel_replicability">[docs]</a>
|
|
||||||
<span class="k">def</span> <span class="nf">test_parallel_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIMulticlassDataset</span><span class="p">(</span><span class="s1">'dry-bean'</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
|
||||||
|
|
||||||
<span class="n">test</span> <span class="o">=</span> <span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">500</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.2</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">])</span>
|
|
||||||
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
|
|
||||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
|
|
||||||
<span class="n">prev1</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
|
|
||||||
<span class="n">prev2</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
|
||||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
|
||||||
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
|
|
||||||
<span class="n">prev3</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
|
|
||||||
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">prev1</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">prev2</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="n">prev3</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">prev1</span><span class="p">,</span> <span class="n">prev2</span><span class="p">)</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">prev2</span><span class="p">,</span> <span class="n">prev3</span><span class="p">)</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
|
||||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -1,402 +0,0 @@
|
||||||
<!DOCTYPE html>
|
|
||||||
<html class="writer-html5" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta charset="utf-8" />
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
||||||
<title>quapy.util — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
|
||||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
|
||||||
|
|
||||||
|
|
||||||
<!--[if lt IE 9]>
|
|
||||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
|
||||||
<![endif]-->
|
|
||||||
|
|
||||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
|
||||||
<script src="../../_static/jquery.js"></script>
|
|
||||||
<script src="../../_static/underscore.js"></script>
|
|
||||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
|
||||||
<script src="../../_static/doctools.js"></script>
|
|
||||||
<script src="../../_static/sphinx_highlight.js"></script>
|
|
||||||
<script src="../../_static/js/theme.js"></script>
|
|
||||||
<link rel="index" title="Index" href="../../genindex.html" />
|
|
||||||
<link rel="search" title="Search" href="../../search.html" />
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body class="wy-body-for-nav">
|
|
||||||
<div class="wy-grid-for-nav">
|
|
||||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
||||||
<div class="wy-side-scroll">
|
|
||||||
<div class="wy-side-nav-search" >
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<a href="../../index.html" class="icon icon-home">
|
|
||||||
QuaPy: A Python-based open-source framework for quantification
|
|
||||||
</a>
|
|
||||||
<div role="search">
|
|
||||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
|
||||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
||||||
<input type="hidden" name="check_keywords" value="yes" />
|
|
||||||
<input type="hidden" name="area" value="default" />
|
|
||||||
</form>
|
|
||||||
</div>
|
|
||||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
||||||
<ul>
|
|
||||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
||||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
||||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
|
||||||
</nav>
|
|
||||||
|
|
||||||
<div class="wy-nav-content">
|
|
||||||
<div class="rst-content">
|
|
||||||
<div role="navigation" aria-label="Page navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
||||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
|
||||||
<li class="breadcrumb-item active">quapy.util</li>
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
||||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
||||||
<div itemprop="articleBody">
|
|
||||||
|
|
||||||
<h1>Source code for quapy.util</h1><div class="highlight"><pre>
|
|
||||||
<span></span><span class="kn">import</span> <span class="nn">contextlib</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">itertools</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">multiprocessing</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">os</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">pickle</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">urllib</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">contextlib</span> <span class="kn">import</span> <span class="n">ExitStack</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
|
||||||
|
|
||||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">joblib</span> <span class="kn">import</span> <span class="n">Parallel</span><span class="p">,</span> <span class="n">delayed</span>
|
|
||||||
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
|
|
||||||
<span class="kn">import</span> <span class="nn">signal</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_get_parallel_slices</span><span class="p">(</span><span class="n">n_tasks</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">n_jobs</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
|
|
||||||
<span class="n">n_jobs</span> <span class="o">=</span> <span class="n">multiprocessing</span><span class="o">.</span><span class="n">cpu_count</span><span class="p">()</span>
|
|
||||||
<span class="n">batch</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">n_tasks</span> <span class="o">/</span> <span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
<span class="n">remainder</span> <span class="o">=</span> <span class="n">n_tasks</span> <span class="o">%</span> <span class="n">n_jobs</span>
|
|
||||||
<span class="k">return</span> <span class="p">[</span><span class="nb">slice</span><span class="p">(</span><span class="n">job</span> <span class="o">*</span> <span class="n">batch</span><span class="p">,</span> <span class="p">(</span><span class="n">job</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">batch</span> <span class="o">+</span> <span class="p">(</span><span class="n">remainder</span> <span class="k">if</span> <span class="n">job</span> <span class="o">==</span> <span class="n">n_jobs</span> <span class="o">-</span> <span class="mi">1</span> <span class="k">else</span> <span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">job</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)]</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="map_parallel"><a class="viewcode-back" href="../../quapy.html#quapy.util.map_parallel">[docs]</a><span class="k">def</span> <span class="nf">map_parallel</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then</span>
|
|
||||||
<span class="sd"> func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function</span>
|
|
||||||
<span class="sd"> that already works with a list of arguments.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param func: function to be parallelized</span>
|
|
||||||
<span class="sd"> :param args: array-like of arguments to be passed to the function in different parallel calls</span>
|
|
||||||
<span class="sd"> :param n_jobs: the number of workers</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">args</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
|
|
||||||
<span class="n">slices</span> <span class="o">=</span> <span class="n">_get_parallel_slices</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">args</span><span class="p">),</span> <span class="n">n_jobs</span><span class="p">)</span>
|
|
||||||
<span class="n">results</span> <span class="o">=</span> <span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)(</span>
|
|
||||||
<span class="n">delayed</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span><span class="n">args</span><span class="p">[</span><span class="n">slice_i</span><span class="p">])</span> <span class="k">for</span> <span class="n">slice_i</span> <span class="ow">in</span> <span class="n">slices</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="o">.</span><span class="n">from_iterable</span><span class="p">(</span><span class="n">results</span><span class="p">))</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="parallel"><a class="viewcode-back" href="../../quapy.html#quapy.util.parallel">[docs]</a><span class="k">def</span> <span class="nf">parallel</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">asarray</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="s1">'loky'</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> A wrapper of multiprocessing:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> Parallel(n_jobs=n_jobs)(</span>
|
|
||||||
<span class="sd"> >>> delayed(func)(args_i) for args_i in args</span>
|
|
||||||
<span class="sd"> >>> )</span>
|
|
||||||
|
|
||||||
<span class="sd"> that takes the `quapy.environ` variable as input silently.</span>
|
|
||||||
<span class="sd"> Seeds the child processes to ensure reproducibility when n_jobs>1.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param func: callable</span>
|
|
||||||
<span class="sd"> :param args: args of func</span>
|
|
||||||
<span class="sd"> :param seed: the numeric seed</span>
|
|
||||||
<span class="sd"> :param asarray: set to True to return a np.ndarray instead of a list</span>
|
|
||||||
<span class="sd"> :param backend: indicates the backend used for handling parallel works</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">def</span> <span class="nf">func_dec</span><span class="p">(</span><span class="n">environ</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span> <span class="o">=</span> <span class="n">environ</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'N_JOBS'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
|
|
||||||
<span class="c1">#set a context with a temporal seed to ensure results are reproducibles in parallel</span>
|
|
||||||
<span class="k">with</span> <span class="n">ExitStack</span><span class="p">()</span> <span class="k">as</span> <span class="n">stack</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">stack</span><span class="o">.</span><span class="n">enter_context</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="n">seed</span><span class="p">))</span>
|
|
||||||
<span class="k">return</span> <span class="n">func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="n">out</span> <span class="o">=</span> <span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="n">backend</span><span class="p">)(</span>
|
|
||||||
<span class="n">delayed</span><span class="p">(</span><span class="n">func_dec</span><span class="p">)(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">,</span> <span class="kc">None</span> <span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="p">,</span> <span class="n">args_i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">args_i</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
|
|
||||||
<span class="p">)</span>
|
|
||||||
<span class="k">if</span> <span class="n">asarray</span><span class="p">:</span>
|
|
||||||
<span class="n">out</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">out</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="temp_seed"><a class="viewcode-back" href="../../quapy.html#quapy.util.temp_seed">[docs]</a><span class="nd">@contextlib</span><span class="o">.</span><span class="n">contextmanager</span>
|
|
||||||
<span class="k">def</span> <span class="nf">temp_seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Can be used in a "with" context to set a temporal seed without modifying the outer numpy's current state. E.g.:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> with temp_seed(random_seed):</span>
|
|
||||||
<span class="sd"> >>> pass # do any computation depending on np.random functionality</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param random_state: the seed to set within the "with" context</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">state</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">get_state</span><span class="p">()</span>
|
|
||||||
<span class="c1">#save the seed just in case is needed (for instance for setting the seed to child processes)</span>
|
|
||||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'_R_SEED'</span><span class="p">]</span> <span class="o">=</span> <span class="n">random_state</span>
|
|
||||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
|
||||||
<span class="k">try</span><span class="p">:</span>
|
|
||||||
<span class="k">yield</span>
|
|
||||||
<span class="k">finally</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="download_file"><a class="viewcode-back" href="../../quapy.html#quapy.util.download_file">[docs]</a><span class="k">def</span> <span class="nf">download_file</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Downloads a file from a url</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param url: the url</span>
|
|
||||||
<span class="sd"> :param archive_filename: destination filename</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">def</span> <span class="nf">progress</span><span class="p">(</span><span class="n">blocknum</span><span class="p">,</span> <span class="n">bs</span><span class="p">,</span> <span class="n">size</span><span class="p">):</span>
|
|
||||||
<span class="n">total_sz_mb</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%.2f</span><span class="s1"> MB'</span> <span class="o">%</span> <span class="p">(</span><span class="n">size</span> <span class="o">/</span> <span class="mf">1e6</span><span class="p">)</span>
|
|
||||||
<span class="n">current_sz_mb</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%.2f</span><span class="s1"> MB'</span> <span class="o">%</span> <span class="p">((</span><span class="n">blocknum</span> <span class="o">*</span> <span class="n">bs</span><span class="p">)</span> <span class="o">/</span> <span class="mf">1e6</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'</span><span class="se">\r</span><span class="s1">downloaded </span><span class="si">%s</span><span class="s1"> / </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">current_sz_mb</span><span class="p">,</span> <span class="n">total_sz_mb</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Downloading </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">url</span><span class="p">)</span>
|
|
||||||
<span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlretrieve</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">filename</span><span class="o">=</span><span class="n">archive_filename</span><span class="p">,</span> <span class="n">reporthook</span><span class="o">=</span><span class="n">progress</span><span class="p">)</span>
|
|
||||||
<span class="nb">print</span><span class="p">(</span><span class="s2">""</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="download_file_if_not_exists"><a class="viewcode-back" href="../../quapy.html#quapy.util.download_file_if_not_exists">[docs]</a><span class="k">def</span> <span class="nf">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Dowloads a function (using :meth:`download_file`) if the file does not exist.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param url: the url</span>
|
|
||||||
<span class="sd"> :param archive_filename: destination filename</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">archive_filename</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span>
|
|
||||||
<span class="n">create_if_not_exist</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">archive_filename</span><span class="p">))</span>
|
|
||||||
<span class="n">download_file</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="create_if_not_exist"><a class="viewcode-back" href="../../quapy.html#quapy.util.create_if_not_exist">[docs]</a><span class="k">def</span> <span class="nf">create_if_not_exist</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> An alias to `os.makedirs(path, exist_ok=True)` that also returns the path. This is useful in cases like, e.g.:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> path = create_if_not_exist(os.path.join(dir, subdir, anotherdir))</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param path: path to create</span>
|
|
||||||
<span class="sd"> :return: the path itself</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">path</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="get_quapy_home"><a class="viewcode-back" href="../../quapy.html#quapy.util.get_quapy_home">[docs]</a><span class="k">def</span> <span class="nf">get_quapy_home</span><span class="p">():</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets.</span>
|
|
||||||
<span class="sd"> This directory is `~/quapy_data`</span>
|
|
||||||
|
|
||||||
<span class="sd"> :return: a string representing the path</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">home</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">Path</span><span class="o">.</span><span class="n">home</span><span class="p">()),</span> <span class="s1">'quapy_data'</span><span class="p">)</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">home</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">home</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="create_parent_dir"><a class="viewcode-back" href="../../quapy.html#quapy.util.create_parent_dir">[docs]</a><span class="k">def</span> <span class="nf">create_parent_dir</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Creates the parent dir (if any) of a given path, if not exists. E.g., for `./path/to/file.txt`, the path `./path/to`</span>
|
|
||||||
<span class="sd"> is created.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param path: the path</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">parentdir</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span>
|
|
||||||
<span class="k">if</span> <span class="n">parentdir</span><span class="p">:</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">parentdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="save_text_file"><a class="viewcode-back" href="../../quapy.html#quapy.util.save_text_file">[docs]</a><span class="k">def</span> <span class="nf">save_text_file</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Saves a text file to disk, given its full path, and creates the parent directory if missing.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param path: path where to save the path.</span>
|
|
||||||
<span class="sd"> :param text: text to save.</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="n">create_parent_dir</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
|
|
||||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="s1">'wt'</span><span class="p">)</span> <span class="k">as</span> <span class="n">fout</span><span class="p">:</span>
|
|
||||||
<span class="n">fout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">text</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="pickled_resource"><a class="viewcode-back" href="../../quapy.html#quapy.util.pickled_resource">[docs]</a><span class="k">def</span> <span class="nf">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">generation_func</span><span class="p">:</span><span class="n">callable</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Allows for fast reuse of resources that are generated only once by calling generation_func(\\*args). The next times</span>
|
|
||||||
<span class="sd"> this function is invoked, it loads the pickled resource. Example:</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> def some_array(n): # a mock resource created with one parameter (`n`)</span>
|
|
||||||
<span class="sd"> >>> return np.random.rand(n)</span>
|
|
||||||
<span class="sd"> >>> pickled_resource('./my_array.pkl', some_array, 10) # the resource does not exist: it is created by calling some_array(10)</span>
|
|
||||||
<span class="sd"> >>> pickled_resource('./my_array.pkl', some_array, 10) # the resource exists; it is loaded from './my_array.pkl'</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param pickle_path: the path where to save (first time) and load (next times) the resource</span>
|
|
||||||
<span class="sd"> :param generation_func: the function that generates the resource, in case it does not exist in pickle_path</span>
|
|
||||||
<span class="sd"> :param args: any arg that generation_func uses for generating the resources</span>
|
|
||||||
<span class="sd"> :return: the resource</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">pickle_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">return</span> <span class="n">generation_func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">):</span>
|
|
||||||
<span class="k">return</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="s1">'rb'</span><span class="p">))</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="n">instance</span> <span class="o">=</span> <span class="n">generation_func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
|
|
||||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span><span class="p">),</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
||||||
<span class="n">pickle</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">),</span> <span class="n">pickle</span><span class="o">.</span><span class="n">HIGHEST_PROTOCOL</span><span class="p">)</span>
|
|
||||||
<span class="k">return</span> <span class="n">instance</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="nf">_check_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">):</span>
|
|
||||||
<span class="k">if</span> <span class="n">sample_size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
||||||
<span class="k">assert</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> \
|
|
||||||
<span class="s1">'error: sample_size set to None, and cannot be resolved from the environment'</span>
|
|
||||||
<span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span>
|
|
||||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sample_size</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">and</span> <span class="n">sample_size</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> \
|
|
||||||
<span class="s1">'error: sample_size is not a positive integer'</span>
|
|
||||||
<span class="k">return</span> <span class="n">sample_size</span>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="EarlyStop"><a class="viewcode-back" href="../../quapy.html#quapy.util.EarlyStop">[docs]</a><span class="k">class</span> <span class="nc">EarlyStop</span><span class="p">:</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> A class implementing the early-stopping condition typically used for training neural networks.</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> earlystop = EarlyStop(patience=2, lower_is_better=True)</span>
|
|
||||||
<span class="sd"> >>> earlystop(0.9, epoch=0)</span>
|
|
||||||
<span class="sd"> >>> earlystop(0.7, epoch=1)</span>
|
|
||||||
<span class="sd"> >>> earlystop.IMPROVED # is True</span>
|
|
||||||
<span class="sd"> >>> earlystop(1.0, epoch=2)</span>
|
|
||||||
<span class="sd"> >>> earlystop.STOP # is False (patience=1)</span>
|
|
||||||
<span class="sd"> >>> earlystop(1.0, epoch=3)</span>
|
|
||||||
<span class="sd"> >>> earlystop.STOP # is True (patience=0)</span>
|
|
||||||
<span class="sd"> >>> earlystop.best_epoch # is 1</span>
|
|
||||||
<span class="sd"> >>> earlystop.best_score # is 0.7</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a</span>
|
|
||||||
<span class="sd"> held-out validation split) can be found to be worse than the best one obtained so far, before flagging the</span>
|
|
||||||
<span class="sd"> stopping condition. An instance of this class is `callable`, and is to be used as follows:</span>
|
|
||||||
<span class="sd"> :param lower_is_better: if True (default) the metric is to be minimized.</span>
|
|
||||||
<span class="sd"> :ivar best_score: keeps track of the best value seen so far</span>
|
|
||||||
<span class="sd"> :ivar best_epoch: keeps track of the epoch in which the best score was set</span>
|
|
||||||
<span class="sd"> :ivar STOP: flag (boolean) indicating the stopping condition</span>
|
|
||||||
<span class="sd"> :ivar IMPROVED: flag (boolean) indicating whether there was an improvement in the last call</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">patience</span><span class="p">,</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
|
||||||
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span> <span class="o">=</span> <span class="n">patience</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">better</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">a</span><span class="p">,</span><span class="n">b</span><span class="p">:</span> <span class="n">a</span><span class="o"><</span><span class="n">b</span> <span class="k">if</span> <span class="n">lower_is_better</span> <span class="k">else</span> <span class="n">a</span><span class="o">></span><span class="n">b</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="n">patience</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_epoch</span> <span class="o">=</span> <span class="kc">None</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">STOP</span> <span class="o">=</span> <span class="kc">False</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span> <span class="o">=</span> <span class="kc">False</span>
|
|
||||||
|
|
||||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">watch_score</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Commits the new score found in epoch `epoch`. If the score improves over the best score found so far, then</span>
|
|
||||||
<span class="sd"> the patiente counter gets reset. If otherwise, the patience counter is decreased, and in case it reachs 0,</span>
|
|
||||||
<span class="sd"> the flag STOP becomes True.</span>
|
|
||||||
|
|
||||||
<span class="sd"> :param watch_score: the new score</span>
|
|
||||||
<span class="sd"> :param epoch: the current epoch</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">better</span><span class="p">(</span><span class="n">watch_score</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score</span><span class="p">))</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="o">=</span> <span class="n">watch_score</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_epoch</span> <span class="o">=</span> <span class="n">epoch</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span>
|
|
||||||
<span class="k">else</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">-=</span> <span class="mi">1</span>
|
|
||||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o"><=</span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="bp">self</span><span class="o">.</span><span class="n">STOP</span> <span class="o">=</span> <span class="kc">True</span></div>
|
|
||||||
|
|
||||||
|
|
||||||
<div class="viewcode-block" id="timeout"><a class="viewcode-back" href="../../quapy.html#quapy.util.timeout">[docs]</a><span class="nd">@contextlib</span><span class="o">.</span><span class="n">contextmanager</span>
|
|
||||||
<span class="k">def</span> <span class="nf">timeout</span><span class="p">(</span><span class="n">seconds</span><span class="p">):</span>
|
|
||||||
<span class="w"> </span><span class="sd">"""</span>
|
|
||||||
<span class="sd"> Opens a context that will launch an exception if not closed after a given number of seconds</span>
|
|
||||||
|
|
||||||
<span class="sd"> >>> def func(start_msg, end_msg):</span>
|
|
||||||
<span class="sd"> >>> print(start_msg)</span>
|
|
||||||
<span class="sd"> >>> sleep(2)</span>
|
|
||||||
<span class="sd"> >>> print(end_msg)</span>
|
|
||||||
<span class="sd"> >>></span>
|
|
||||||
<span class="sd"> >>> with timeout(1):</span>
|
|
||||||
<span class="sd"> >>> func('begin function', 'end function')</span>
|
|
||||||
<span class="sd"> >>> Out[]</span>
|
|
||||||
<span class="sd"> >>> begin function</span>
|
|
||||||
<span class="sd"> >>> TimeoutError</span>
|
|
||||||
|
|
||||||
|
|
||||||
<span class="sd"> :param seconds: number of seconds, set to <=0 to ignore the timer</span>
|
|
||||||
<span class="sd"> """</span>
|
|
||||||
<span class="k">if</span> <span class="n">seconds</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="k">def</span> <span class="nf">handler</span><span class="p">(</span><span class="n">signum</span><span class="p">,</span> <span class="n">frame</span><span class="p">):</span>
|
|
||||||
<span class="k">raise</span> <span class="ne">TimeoutError</span><span class="p">()</span>
|
|
||||||
|
|
||||||
<span class="n">signal</span><span class="o">.</span><span class="n">signal</span><span class="p">(</span><span class="n">signal</span><span class="o">.</span><span class="n">SIGALRM</span><span class="p">,</span> <span class="n">handler</span><span class="p">)</span>
|
|
||||||
<span class="n">signal</span><span class="o">.</span><span class="n">alarm</span><span class="p">(</span><span class="n">seconds</span><span class="p">)</span>
|
|
||||||
|
|
||||||
<span class="k">yield</span>
|
|
||||||
|
|
||||||
<span class="k">if</span> <span class="n">seconds</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
||||||
<span class="n">signal</span><span class="o">.</span><span class="n">alarm</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span></div>
|
|
||||||
|
|
||||||
</pre></div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<footer>
|
|
||||||
|
|
||||||
<hr/>
|
|
||||||
|
|
||||||
<div role="contentinfo">
|
|
||||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
||||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
||||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
||||||
|
|
||||||
|
|
||||||
</footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</section>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
jQuery(function () {
|
|
||||||
SphinxRtdTheme.Navigation.enable(true);
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
@ -0,0 +1,356 @@
|
||||||
|
# Datasets
|
||||||
|
|
||||||
|
QuaPy makes available several datasets that have been used in
|
||||||
|
quantification literature, as well as an interface to allow
|
||||||
|
anyone import their custom datasets.
|
||||||
|
|
||||||
|
A _Dataset_ object in QuaPy is roughly a pair of _LabelledCollection_ objects,
|
||||||
|
one playing the role of the training set, another the test set.
|
||||||
|
_LabelledCollection_ is a data class consisting of the (iterable)
|
||||||
|
instances and labels. This class handles most of the sampling functionality in QuaPy.
|
||||||
|
Take a look at the following code:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
import quapy.functional as F
|
||||||
|
|
||||||
|
instances = [
|
||||||
|
'1st positive document', '2nd positive document',
|
||||||
|
'the only negative document',
|
||||||
|
'1st neutral document', '2nd neutral document', '3rd neutral document'
|
||||||
|
]
|
||||||
|
labels = [2, 2, 0, 1, 1, 1]
|
||||||
|
|
||||||
|
data = qp.data.LabelledCollection(instances, labels)
|
||||||
|
print(F.strprev(data.prevalence(), prec=2))
|
||||||
|
```
|
||||||
|
|
||||||
|
Output the class prevalences (showing 2 digit precision):
|
||||||
|
```
|
||||||
|
[0.17, 0.50, 0.33]
|
||||||
|
```
|
||||||
|
|
||||||
|
One can easily produce new samples at desired class prevalence values:
|
||||||
|
|
||||||
|
```python
|
||||||
|
sample_size = 10
|
||||||
|
prev = [0.4, 0.1, 0.5]
|
||||||
|
sample = data.sampling(sample_size, *prev)
|
||||||
|
|
||||||
|
print('instances:', sample.instances)
|
||||||
|
print('labels:', sample.labels)
|
||||||
|
print('prevalence:', F.strprev(sample.prevalence(), prec=2))
|
||||||
|
```
|
||||||
|
|
||||||
|
Which outputs:
|
||||||
|
```
|
||||||
|
instances: ['the only negative document' '2nd positive document'
|
||||||
|
'2nd positive document' '2nd neutral document' '1st positive document'
|
||||||
|
'the only negative document' 'the only negative document'
|
||||||
|
'the only negative document' '2nd positive document'
|
||||||
|
'1st positive document']
|
||||||
|
labels: [0 2 2 1 2 0 0 0 2 2]
|
||||||
|
prevalence: [0.40, 0.10, 0.50]
|
||||||
|
```
|
||||||
|
|
||||||
|
Samples can be made consistent across different runs (e.g., to test
|
||||||
|
different methods on the same exact samples) by sampling and retaining
|
||||||
|
the indexes, that can then be used to generate the sample:
|
||||||
|
|
||||||
|
```python
|
||||||
|
index = data.sampling_index(sample_size, *prev)
|
||||||
|
for method in methods:
|
||||||
|
sample = data.sampling_from_index(index)
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
However, generating samples for evaluation purposes is tackled in QuaPy
|
||||||
|
by means of the evaluation protocols (see the dedicated entries in the Wiki
|
||||||
|
for [evaluation](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation) and
|
||||||
|
[protocols](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols)).
|
||||||
|
|
||||||
|
|
||||||
|
## Reviews Datasets
|
||||||
|
|
||||||
|
Three datasets of reviews about Kindle devices, Harry Potter's series, and
|
||||||
|
the well-known IMDb movie reviews can be fetched using a unified interface.
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
data = qp.datasets.fetch_reviews('kindle')
|
||||||
|
```
|
||||||
|
|
||||||
|
These datasets have been used in:
|
||||||
|
```
|
||||||
|
Esuli, A., Moreo, A., & Sebastiani, F. (2018, October).
|
||||||
|
A recurrent neural network for sentiment quantification.
|
||||||
|
In Proceedings of the 27th ACM International Conference on
|
||||||
|
Information and Knowledge Management (pp. 1775-1778).
|
||||||
|
```
|
||||||
|
|
||||||
|
The list of reviews ids is available in:
|
||||||
|
|
||||||
|
```python
|
||||||
|
qp.datasets.REVIEWS_SENTIMENT_DATASETS
|
||||||
|
```
|
||||||
|
|
||||||
|
Some statistics of the fhe available datasets are summarized below:
|
||||||
|
|
||||||
|
| Dataset | classes | train size | test size | train prev | test prev | type |
|
||||||
|
|---|:---:|:---:|:---:|:---:|:---:|---|
|
||||||
|
| hp | 2 | 9533 | 18399 | [0.018, 0.982] | [0.065, 0.935] | text |
|
||||||
|
| kindle | 2 | 3821 | 21591 | [0.081, 0.919] | [0.063, 0.937] | text |
|
||||||
|
| imdb | 2 | 25000 | 25000 | [0.500, 0.500] | [0.500, 0.500] | text |
|
||||||
|
|
||||||
|
|
||||||
|
## Twitter Sentiment Datasets
|
||||||
|
|
||||||
|
11 Twitter datasets for sentiment analysis.
|
||||||
|
Text is not accessible, and the documents were made available
|
||||||
|
in tf-idf format. Each dataset presents two splits: a train/val
|
||||||
|
split for model selection purposes, and a train+val/test split
|
||||||
|
for model evaluation. The following code exemplifies how to load
|
||||||
|
a twitter dataset for model selection.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
data = qp.datasets.fetch_twitter('gasp', for_model_selection=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
The datasets were used in:
|
||||||
|
|
||||||
|
```
|
||||||
|
Gao, W., & Sebastiani, F. (2015, August).
|
||||||
|
Tweet sentiment: From classification to quantification.
|
||||||
|
In 2015 IEEE/ACM International Conference on Advances in
|
||||||
|
Social Networks Analysis and Mining (ASONAM) (pp. 97-104). IEEE.
|
||||||
|
```
|
||||||
|
|
||||||
|
Three of the datasets (semeval13, semeval14, and semeval15) share the
|
||||||
|
same training set (semeval), meaning that the training split one would get
|
||||||
|
when requesting any of them is the same. The dataset "semeval" can only
|
||||||
|
be requested with "for_model_selection=True".
|
||||||
|
The lists of the Twitter dataset's ids can be consulted in:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# a list of 11 dataset ids that can be used for model selection or model evaluation
|
||||||
|
qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
|
||||||
|
|
||||||
|
# 9 dataset ids in which "semeval13", "semeval14", and "semeval15" are replaced with "semeval"
|
||||||
|
qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
|
||||||
|
```
|
||||||
|
|
||||||
|
Some details can be found below:
|
||||||
|
|
||||||
|
| Dataset | classes | train size | test size | features | train prev | test prev | type |
|
||||||
|
|---|:---:|:---:|:---:|:---:|:---:|:---:|---|
|
||||||
|
| gasp | 3 | 8788 | 3765 | 694582 | [0.421, 0.496, 0.082] | [0.407, 0.507, 0.086] | sparse |
|
||||||
|
| hcr | 3 | 1594 | 798 | 222046 | [0.546, 0.211, 0.243] | [0.640, 0.167, 0.193] | sparse |
|
||||||
|
| omd | 3 | 1839 | 787 | 199151 | [0.463, 0.271, 0.266] | [0.437, 0.283, 0.280] | sparse |
|
||||||
|
| sanders | 3 | 2155 | 923 | 229399 | [0.161, 0.691, 0.148] | [0.164, 0.688, 0.148] | sparse |
|
||||||
|
| semeval13 | 3 | 11338 | 3813 | 1215742 | [0.159, 0.470, 0.372] | [0.158, 0.430, 0.412] | sparse |
|
||||||
|
| semeval14 | 3 | 11338 | 1853 | 1215742 | [0.159, 0.470, 0.372] | [0.109, 0.361, 0.530] | sparse |
|
||||||
|
| semeval15 | 3 | 11338 | 2390 | 1215742 | [0.159, 0.470, 0.372] | [0.153, 0.413, 0.434] | sparse |
|
||||||
|
| semeval16 | 3 | 8000 | 2000 | 889504 | [0.157, 0.351, 0.492] | [0.163, 0.341, 0.497] | sparse |
|
||||||
|
| sst | 3 | 2971 | 1271 | 376132 | [0.261, 0.452, 0.288] | [0.207, 0.481, 0.312] | sparse |
|
||||||
|
| wa | 3 | 2184 | 936 | 248563 | [0.305, 0.414, 0.281] | [0.282, 0.446, 0.272] | sparse |
|
||||||
|
| wb | 3 | 4259 | 1823 | 404333 | [0.270, 0.392, 0.337] | [0.274, 0.392, 0.335] | sparse |
|
||||||
|
|
||||||
|
|
||||||
|
## UCI Machine Learning
|
||||||
|
|
||||||
|
A set of 32 datasets from the [UCI Machine Learning repository](https://archive.ics.uci.edu/ml/datasets.php)
|
||||||
|
used in:
|
||||||
|
|
||||||
|
```
|
||||||
|
Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
|
||||||
|
Using ensembles for problems with characterizable changes
|
||||||
|
in data distribution: A case study on quantification.
|
||||||
|
Information Fusion, 34, 87-100.
|
||||||
|
```
|
||||||
|
|
||||||
|
The list does not exactly coincide with that used in Pérez-Gállego et al. 2017
|
||||||
|
since we were unable to find the datasets with ids "diabetes" and "phoneme".
|
||||||
|
|
||||||
|
These dataset can be loaded by calling, e.g.:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
data = qp.datasets.fetch_UCIDataset('yeast', verbose=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
This call will return a _Dataset_ object in which the training and
|
||||||
|
test splits are randomly drawn, in a stratified manner, from the whole
|
||||||
|
collection at 70% and 30%, respectively. The _verbose=True_ option indicates
|
||||||
|
that the dataset description should be printed in standard output.
|
||||||
|
The original data is not split,
|
||||||
|
and some papers submit the entire collection to a kFCV validation.
|
||||||
|
In order to accommodate with these practices, one could first instantiate
|
||||||
|
the entire collection, and then creating a generator that will return one
|
||||||
|
training+test dataset at a time, following a kFCV protocol:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
collection = qp.datasets.fetch_UCILabelledCollection("yeast")
|
||||||
|
for data in qp.data.Dataset.kFCV(collection, nfolds=5, nrepeats=2):
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
Above code will allow to conduct a 2x5FCV evaluation on the "yeast" dataset.
|
||||||
|
|
||||||
|
All datasets come in numerical form (dense matrices); some statistics
|
||||||
|
are summarized below.
|
||||||
|
|
||||||
|
| Dataset | classes | instances | features | prev | type |
|
||||||
|
|---|:---:|:---:|:---:|:---:|---|
|
||||||
|
| acute.a | 2 | 120 | 6 | [0.508, 0.492] | dense |
|
||||||
|
| acute.b | 2 | 120 | 6 | [0.583, 0.417] | dense |
|
||||||
|
| balance.1 | 2 | 625 | 4 | [0.539, 0.461] | dense |
|
||||||
|
| balance.2 | 2 | 625 | 4 | [0.922, 0.078] | dense |
|
||||||
|
| balance.3 | 2 | 625 | 4 | [0.539, 0.461] | dense |
|
||||||
|
| breast-cancer | 2 | 683 | 9 | [0.350, 0.650] | dense |
|
||||||
|
| cmc.1 | 2 | 1473 | 9 | [0.573, 0.427] | dense |
|
||||||
|
| cmc.2 | 2 | 1473 | 9 | [0.774, 0.226] | dense |
|
||||||
|
| cmc.3 | 2 | 1473 | 9 | [0.653, 0.347] | dense |
|
||||||
|
| ctg.1 | 2 | 2126 | 22 | [0.222, 0.778] | dense |
|
||||||
|
| ctg.2 | 2 | 2126 | 22 | [0.861, 0.139] | dense |
|
||||||
|
| ctg.3 | 2 | 2126 | 22 | [0.917, 0.083] | dense |
|
||||||
|
| german | 2 | 1000 | 24 | [0.300, 0.700] | dense |
|
||||||
|
| haberman | 2 | 306 | 3 | [0.735, 0.265] | dense |
|
||||||
|
| ionosphere | 2 | 351 | 34 | [0.641, 0.359] | dense |
|
||||||
|
| iris.1 | 2 | 150 | 4 | [0.667, 0.333] | dense |
|
||||||
|
| iris.2 | 2 | 150 | 4 | [0.667, 0.333] | dense |
|
||||||
|
| iris.3 | 2 | 150 | 4 | [0.667, 0.333] | dense |
|
||||||
|
| mammographic | 2 | 830 | 5 | [0.514, 0.486] | dense |
|
||||||
|
| pageblocks.5 | 2 | 5473 | 10 | [0.979, 0.021] | dense |
|
||||||
|
| semeion | 2 | 1593 | 256 | [0.901, 0.099] | dense |
|
||||||
|
| sonar | 2 | 208 | 60 | [0.534, 0.466] | dense |
|
||||||
|
| spambase | 2 | 4601 | 57 | [0.606, 0.394] | dense |
|
||||||
|
| spectf | 2 | 267 | 44 | [0.794, 0.206] | dense |
|
||||||
|
| tictactoe | 2 | 958 | 9 | [0.653, 0.347] | dense |
|
||||||
|
| transfusion | 2 | 748 | 4 | [0.762, 0.238] | dense |
|
||||||
|
| wdbc | 2 | 569 | 30 | [0.627, 0.373] | dense |
|
||||||
|
| wine.1 | 2 | 178 | 13 | [0.669, 0.331] | dense |
|
||||||
|
| wine.2 | 2 | 178 | 13 | [0.601, 0.399] | dense |
|
||||||
|
| wine.3 | 2 | 178 | 13 | [0.730, 0.270] | dense |
|
||||||
|
| wine-q-red | 2 | 1599 | 11 | [0.465, 0.535] | dense |
|
||||||
|
| wine-q-white | 2 | 4898 | 11 | [0.335, 0.665] | dense |
|
||||||
|
| yeast | 2 | 1484 | 8 | [0.711, 0.289] | dense |
|
||||||
|
|
||||||
|
### Issues:
|
||||||
|
All datasets will be downloaded automatically the first time they are requested, and
|
||||||
|
stored in the _quapy_data_ folder for faster further reuse.
|
||||||
|
However, some datasets require special actions that at the moment are not fully
|
||||||
|
automated.
|
||||||
|
|
||||||
|
* Datasets with ids "ctg.1", "ctg.2", and "ctg.3" (_Cardiotocography Data Set_) load
|
||||||
|
an Excel file, which requires the user to install the _xlrd_ Python module in order
|
||||||
|
to open it.
|
||||||
|
* The dataset with id "pageblocks.5" (_Page Blocks Classification (5)_) needs to
|
||||||
|
open a "unix compressed file" (extension .Z), which is not directly doable with
|
||||||
|
standard Pythons packages like gzip or zip. This file would need to be uncompressed using
|
||||||
|
OS-dependent software manually. Information on how to do it will be printed the first
|
||||||
|
time the dataset is invoked.
|
||||||
|
|
||||||
|
## LeQua Datasets
|
||||||
|
|
||||||
|
QuaPy also provides the datasets used for the LeQua competition.
|
||||||
|
In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification
|
||||||
|
problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide
|
||||||
|
raw documents instead.
|
||||||
|
Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B
|
||||||
|
are multiclass quantification problems consisting of estimating the class prevalence
|
||||||
|
values of 28 different merchandise products.
|
||||||
|
|
||||||
|
Every task consists of a training set, a set of validation samples (for model selection)
|
||||||
|
and a set of test samples (for evaluation). QuaPy returns this data as a LabelledCollection
|
||||||
|
(training) and two generation protocols (for validation and test samples), as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
training, val_generator, test_generator = fetch_lequa2022(task=task)
|
||||||
|
```
|
||||||
|
|
||||||
|
See the `lequa2022_experiments.py` in the examples folder for further details on how to
|
||||||
|
carry out experiments using these datasets.
|
||||||
|
|
||||||
|
The datasets are downloaded only once, and stored for fast reuse.
|
||||||
|
|
||||||
|
Some statistics are summarized below:
|
||||||
|
|
||||||
|
| Dataset | classes | train size | validation samples | test samples | docs by sample | type |
|
||||||
|
|---------|:-------:|:----------:|:------------------:|:------------:|:----------------:|:--------:|
|
||||||
|
| T1A | 2 | 5000 | 1000 | 5000 | 250 | vector |
|
||||||
|
| T1B | 28 | 20000 | 1000 | 5000 | 1000 | vector |
|
||||||
|
| T2A | 2 | 5000 | 1000 | 5000 | 250 | text |
|
||||||
|
| T2B | 28 | 20000 | 1000 | 5000 | 1000 | text |
|
||||||
|
|
||||||
|
For further details on the datasets, we refer to the original
|
||||||
|
[paper](https://ceur-ws.org/Vol-3180/paper-146.pdf):
|
||||||
|
|
||||||
|
```
|
||||||
|
Esuli, A., Moreo, A., Sebastiani, F., & Sperduti, G. (2022).
|
||||||
|
A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Adding Custom Datasets
|
||||||
|
|
||||||
|
QuaPy provides data loaders for simple formats dealing with
|
||||||
|
text, following the format:
|
||||||
|
|
||||||
|
```
|
||||||
|
class-id \t first document's pre-processed text \n
|
||||||
|
class-id \t second document's pre-processed text \n
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
and sparse representations of the form:
|
||||||
|
|
||||||
|
```
|
||||||
|
{-1, 0, or +1} col(int):val(float) col(int):val(float) ... \n
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
The code in charge in loading a LabelledCollection is:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@classmethod
|
||||||
|
def load(cls, path:str, loader_func:callable):
|
||||||
|
return LabelledCollection(*loader_func(path))
|
||||||
|
```
|
||||||
|
|
||||||
|
indicating that any _loader_func_ (e.g., a user-defined one) which
|
||||||
|
returns valid arguments for initializing a _LabelledCollection_ object will allow
|
||||||
|
to load any collection. In particular, the _LabelledCollection_ receives as
|
||||||
|
arguments the instances (as an iterable) and the labels (as an iterable) and,
|
||||||
|
additionally, the number of classes can be specified (it would otherwise be
|
||||||
|
inferred from the labels, but that requires at least one positive example for
|
||||||
|
all classes to be present in the collection).
|
||||||
|
|
||||||
|
The same _loader_func_ can be passed to a Dataset, along with two
|
||||||
|
paths, in order to create a training and test pair of _LabelledCollection_,
|
||||||
|
e.g.:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
|
||||||
|
train_path = '../my_data/train.dat'
|
||||||
|
test_path = '../my_data/test.dat'
|
||||||
|
|
||||||
|
def my_custom_loader(path):
|
||||||
|
with open(path, 'rb') as fin:
|
||||||
|
...
|
||||||
|
return instances, labels
|
||||||
|
|
||||||
|
data = qp.data.Dataset.load(train_path, test_path, my_custom_loader)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Processing
|
||||||
|
|
||||||
|
QuaPy implements a number of preprocessing functions in the package _qp.data.preprocessing_, including:
|
||||||
|
|
||||||
|
* _text2tfidf_: tfidf vectorization
|
||||||
|
* _reduce_columns_: reducing the number of columns based on term frequency
|
||||||
|
* _standardize_: transforms the column values into z-scores (i.e., subtract the mean and normalizes by the standard deviation, so
|
||||||
|
that the column values have zero mean and unit variance).
|
||||||
|
* _index_: transforms textual tokens into lists of numeric ids)
|
||||||
|
|
@ -14,9 +14,19 @@ which are implemented in QuaPy and explained here.
|
||||||
|
|
||||||
## Error Measures
|
## Error Measures
|
||||||
|
|
||||||
The module quapy.error implements the most popular error measures for quantification, e.g., mean absolute error (_mae_), mean relative absolute error (_mrae_), among others. For each such measure (e.g., _mrae_) there are corresponding functions (e.g., _rae_) that do not average the results across samples.
|
The module quapy.error implements the following error measures for quantification:
|
||||||
|
* _mae_: mean absolute error
|
||||||
|
* _mrae_: mean relative absolute error
|
||||||
|
* _mse_: mean squared error
|
||||||
|
* _mkld_: mean Kullback-Leibler Divergence
|
||||||
|
* _mnkld_: mean normalized Kullback-Leibler Divergence
|
||||||
|
|
||||||
Some errors of classification are also available, e.g., accuracy error (_acce_) or F-1 error (_f1e_).
|
Functions _ae_, _rae_, _se_, _kld_, and _nkld_ are also available,
|
||||||
|
which return the individual errors (i.e., without averaging the whole).
|
||||||
|
|
||||||
|
Some errors of classification are also available:
|
||||||
|
* _acce_: accuracy error (1-accuracy)
|
||||||
|
* _f1e_: F-1 score error (1-F1 score)
|
||||||
|
|
||||||
The error functions implement the following interface, e.g.:
|
The error functions implement the following interface, e.g.:
|
||||||
|
|
||||||
|
|
@ -46,18 +56,18 @@ e.g.:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
qp.environ['SAMPLE_SIZE'] = 100 # once for all
|
qp.environ['SAMPLE_SIZE'] = 100 # once for all
|
||||||
true_prev = [0.5, 0.3, 0.2] # let's assume 3 classes
|
true_prev = np.asarray([0.5, 0.3, 0.2]) # let's assume 3 classes
|
||||||
estim_prev = [0.1, 0.3, 0.6]
|
estim_prev = np.asarray([0.1, 0.3, 0.6])
|
||||||
error = qp.error.mrae(true_prev, estim_prev)
|
error = qp.error.mrae(true_prev, estim_prev)
|
||||||
print(f'mrae({true_prev}, {estim_prev}) = {error:.3f}')
|
print(f'mrae({true_prev}, {estim_prev}) = {error:.3f}')
|
||||||
```
|
```
|
||||||
|
|
||||||
will print:
|
will print:
|
||||||
```
|
```
|
||||||
mrae([0.5, 0.3, 0.2], [0.1, 0.3, 0.6]) = 0.914
|
mrae([0.500, 0.300, 0.200], [0.100, 0.300, 0.600]) = 0.914
|
||||||
```
|
```
|
||||||
|
|
||||||
It is also possible to instantiate QuaPy's quantification
|
Finally, it is possible to instantiate QuaPy's quantification
|
||||||
error functions from strings using, e.g.:
|
error functions from strings using, e.g.:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
@ -72,8 +82,8 @@ one specific _sample generation procotol_ to genereate many
|
||||||
samples, typically characterized by widely varying amounts of
|
samples, typically characterized by widely varying amounts of
|
||||||
_shift_ with respect to the original distribution, that are then
|
_shift_ with respect to the original distribution, that are then
|
||||||
used to evaluate the performance of a (trained) quantifier.
|
used to evaluate the performance of a (trained) quantifier.
|
||||||
These protocols are explained in more detail in a dedicated [manual](./protocols.md).
|
These protocols are explained in more detail in a dedicated [entry
|
||||||
For the moment being, let us assume we already have
|
in the wiki](Protocols.md). For the moment being, let us assume we already have
|
||||||
chosen and instantiated one specific such protocol, that we here
|
chosen and instantiated one specific such protocol, that we here
|
||||||
simply call _prot_. Let also assume our model is called
|
simply call _prot_. Let also assume our model is called
|
||||||
_quantifier_ and that our evaluatio measure of choice is
|
_quantifier_ and that our evaluatio measure of choice is
|
||||||
|
|
@ -85,7 +95,7 @@ print(f'MAE = {mae:.4f}')
|
||||||
```
|
```
|
||||||
|
|
||||||
It is often desirable to evaluate our system using more than one
|
It is often desirable to evaluate our system using more than one
|
||||||
single evaluation measure. In this case, it is convenient to generate
|
single evaluatio measure. In this case, it is convenient to generate
|
||||||
a _report_. A report in QuaPy is a dataframe accounting for all the
|
a _report_. A report in QuaPy is a dataframe accounting for all the
|
||||||
true prevalence values with their corresponding prevalence values
|
true prevalence values with their corresponding prevalence values
|
||||||
as estimated by the quantifier, along with the error each has given
|
as estimated by the quantifier, along with the error each has given
|
||||||
|
|
@ -104,7 +114,7 @@ report['estim-prev'] = report['estim-prev'].map(F.strprev)
|
||||||
print(report)
|
print(report)
|
||||||
|
|
||||||
print('Averaged values:')
|
print('Averaged values:')
|
||||||
print(report.mean(numeric_only=True))
|
print(report.mean())
|
||||||
```
|
```
|
||||||
|
|
||||||
This will produce an output like:
|
This will produce an output like:
|
||||||
|
|
@ -141,14 +151,11 @@ true_prevs, estim_prevs = qp.evaluation.prediction(quantifier, protocol=prot)
|
||||||
|
|
||||||
All the evaluation functions implement specific optimizations for speeding-up
|
All the evaluation functions implement specific optimizations for speeding-up
|
||||||
the evaluation of aggregative quantifiers (i.e., of instances of _AggregativeQuantifier_).
|
the evaluation of aggregative quantifiers (i.e., of instances of _AggregativeQuantifier_).
|
||||||
|
|
||||||
The optimization comes down to generating classification predictions (either crisp or soft)
|
The optimization comes down to generating classification predictions (either crisp or soft)
|
||||||
only once for the entire test set, and then applying the sampling procedure to the
|
only once for the entire test set, and then applying the sampling procedure to the
|
||||||
predictions, instead of generating samples of instances and then computing the
|
predictions, instead of generating samples of instances and then computing the
|
||||||
classification predictions every time. This is only possible when the protocol
|
classification predictions every time. This is only possible when the protocol
|
||||||
is an instance of _OnLabelledCollectionProtocol_.
|
is an instance of _OnLabelledCollectionProtocol_. The optimization is only
|
||||||
|
|
||||||
The optimization is only
|
|
||||||
carried out when the number of classification predictions thus generated would be
|
carried out when the number of classification predictions thus generated would be
|
||||||
smaller than the number of predictions required for the entire protocol; e.g.,
|
smaller than the number of predictions required for the entire protocol; e.g.,
|
||||||
if the original dataset contains 1M instances, but the protocol is such that it would
|
if the original dataset contains 1M instances, but the protocol is such that it would
|
||||||
|
|
@ -159,4 +166,4 @@ precompute all the predictions irrespectively of the number of instances and num
|
||||||
Finally, this can be deactivated by setting _aggr_speedup=False_. Note that this optimization
|
Finally, this can be deactivated by setting _aggr_speedup=False_. Note that this optimization
|
||||||
is not only applied for the final evaluation, but also for the internal evaluations carried
|
is not only applied for the final evaluation, but also for the internal evaluations carried
|
||||||
out during _model selection_. Since these are typically many, the heuristic can help reduce the
|
out during _model selection_. Since these are typically many, the heuristic can help reduce the
|
||||||
execution time significatively.
|
execution time a lot.
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
Installation
|
||||||
|
------------
|
||||||
|
|
||||||
|
QuaPy can be easily installed via `pip`
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
pip install quapy
|
||||||
|
|
||||||
|
See `pip page <https://pypi.org/project/QuaPy/>`_ for older versions.
|
||||||
|
|
||||||
|
Requirements
|
||||||
|
************
|
||||||
|
|
||||||
|
* scikit-learn, numpy, scipy
|
||||||
|
* pytorch (for QuaNet)
|
||||||
|
* svmperf patched for quantification (see below)
|
||||||
|
* joblib
|
||||||
|
* tqdm
|
||||||
|
* pandas, xlrd
|
||||||
|
* matplotlib
|
||||||
|
|
||||||
|
|
||||||
|
SVM-perf with quantification-oriented losses
|
||||||
|
********************************************
|
||||||
|
|
||||||
|
In order to run experiments involving SVM(Q), SVM(KLD), SVM(NKLD),
|
||||||
|
SVM(AE), or SVM(RAE), you have to first download the
|
||||||
|
`svmperf <http://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_
|
||||||
|
package, apply the patch
|
||||||
|
`svm-perf-quantification-ext.patch <https://github.com/HLT-ISTI/QuaPy/blob/master/svm-perf-quantification-ext.patch>`_,
|
||||||
|
and compile the sources.
|
||||||
|
The script
|
||||||
|
`prepare_svmperf.sh <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_,
|
||||||
|
does all the job. Simply run:
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
./prepare_svmperf.sh
|
||||||
|
|
||||||
|
|
||||||
|
The resulting directory `./svm_perf_quantification` contains the
|
||||||
|
patched version of `svmperf` with quantification-oriented losses.
|
||||||
|
|
||||||
|
The
|
||||||
|
`svm-perf-quantification-ext.patch <https://github.com/HLT-ISTI/QuaPy/blob/master/svm-perf-quantification-ext.patch>`_
|
||||||
|
is an extension of the patch made available by
|
||||||
|
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0>`_
|
||||||
|
that allows SVMperf to optimize for
|
||||||
|
the `Q` measure as proposed by
|
||||||
|
`Barranquero et al. 2015 <https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X>`_
|
||||||
|
and for the `KLD` and `NKLD` as proposed by
|
||||||
|
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0>`_
|
||||||
|
for quantification.
|
||||||
|
This patch extends the former by also allowing SVMperf to optimize for
|
||||||
|
`AE` and `RAE`.
|
||||||
|
|
@ -0,0 +1,438 @@
|
||||||
|
# Quantification Methods
|
||||||
|
|
||||||
|
Quantification methods can be categorized as belonging to
|
||||||
|
_aggregative_ and _non-aggregative_ groups.
|
||||||
|
Most methods included in QuaPy at the moment are of type _aggregative_
|
||||||
|
(though we plan to add many more methods in the near future), i.e.,
|
||||||
|
are methods characterized by the fact that
|
||||||
|
quantification is performed as an aggregation function of the individual
|
||||||
|
products of classification.
|
||||||
|
|
||||||
|
Any quantifier in QuaPy shoud extend the class _BaseQuantifier_,
|
||||||
|
and implement some abstract methods:
|
||||||
|
```python
|
||||||
|
@abstractmethod
|
||||||
|
def fit(self, data: LabelledCollection): ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def quantify(self, instances): ...
|
||||||
|
```
|
||||||
|
The meaning of those functions should be familiar to those
|
||||||
|
used to work with scikit-learn since the class structure of QuaPy
|
||||||
|
is directly inspired by scikit-learn's _Estimators_. Functions
|
||||||
|
_fit_ and _quantify_ are used to train the model and to provide
|
||||||
|
class estimations (the reason why
|
||||||
|
scikit-learn' structure has not been adopted _as is_ in QuaPy responds to
|
||||||
|
the fact that scikit-learn's _predict_ function is expected to return
|
||||||
|
one output for each input element --e.g., a predicted label for each
|
||||||
|
instance in a sample-- while in quantification the output for a sample
|
||||||
|
is one single array of class prevalences).
|
||||||
|
Quantifiers also extend from scikit-learn's `BaseEstimator`, in order
|
||||||
|
to simplify the use of _set_params_ and _get_params_ used in
|
||||||
|
[model selector](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection).
|
||||||
|
|
||||||
|
## Aggregative Methods
|
||||||
|
|
||||||
|
All quantification methods are implemented as part of the
|
||||||
|
_qp.method_ package. In particular, _aggregative_ methods are defined in
|
||||||
|
_qp.method.aggregative_, and extend _AggregativeQuantifier(BaseQuantifier)_.
|
||||||
|
The methods that any _aggregative_ quantifier must implement are:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@abstractmethod
|
||||||
|
def fit(self, data: LabelledCollection, fit_learner=True): ...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def aggregate(self, classif_predictions:np.ndarray): ...
|
||||||
|
```
|
||||||
|
|
||||||
|
since, as mentioned before, aggregative methods base their prediction on the
|
||||||
|
individual predictions of a classifier. Indeed, a default implementation
|
||||||
|
of _BaseQuantifier.quantify_ is already provided, which looks like:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def quantify(self, instances):
|
||||||
|
classif_predictions = self.classify(instances)
|
||||||
|
return self.aggregate(classif_predictions)
|
||||||
|
```
|
||||||
|
Aggregative quantifiers are expected to maintain a classifier (which is
|
||||||
|
accessed through the _@property_ _classifier_). This classifier is
|
||||||
|
given as input to the quantifier, and can be already fit
|
||||||
|
on external data (in which case, the _fit_learner_ argument should
|
||||||
|
be set to False), or be fit by the quantifier's fit (default).
|
||||||
|
|
||||||
|
Another class of _aggregative_ methods are the _probabilistic_
|
||||||
|
aggregative methods, that should inherit from the abstract class
|
||||||
|
_AggregativeProbabilisticQuantifier(AggregativeQuantifier)_.
|
||||||
|
The particularity of _probabilistic_ aggregative methods (w.r.t.
|
||||||
|
non-probabilistic ones), is that the default quantifier is defined
|
||||||
|
in terms of the posterior probabilities returned by a probabilistic
|
||||||
|
classifier, and not by the crisp decisions of a hard classifier.
|
||||||
|
In any case, the interface _classify(instances)_ remains unchanged.
|
||||||
|
|
||||||
|
One advantage of _aggregative_ methods (either probabilistic or not)
|
||||||
|
is that the evaluation according to any sampling procedure (e.g.,
|
||||||
|
the [artificial sampling protocol](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation))
|
||||||
|
can be achieved very efficiently, since the entire set can be pre-classified
|
||||||
|
once, and the quantification estimations for different samples can directly
|
||||||
|
reuse these predictions, without requiring to classify each element every time.
|
||||||
|
QuaPy leverages this property to speed-up any procedure having to do with
|
||||||
|
quantification over samples, as is customarily done in model selection or
|
||||||
|
in evaluation.
|
||||||
|
|
||||||
|
### The Classify & Count variants
|
||||||
|
|
||||||
|
QuaPy implements the four CC variants, i.e.:
|
||||||
|
|
||||||
|
* _CC_ (Classify & Count), the simplest aggregative quantifier; one that
|
||||||
|
simply relies on the label predictions of a classifier to deliver class estimates.
|
||||||
|
* _ACC_ (Adjusted Classify & Count), the adjusted variant of CC.
|
||||||
|
* _PCC_ (Probabilistic Classify & Count), the probabilistic variant of CC that
|
||||||
|
relies on the soft estimations (or posterior probabilities) returned by a (probabilistic) classifier.
|
||||||
|
* _PACC_ (Probabilistic Adjusted Classify & Count), the adjusted variant of PCC.
|
||||||
|
|
||||||
|
The following code serves as a complete example using CC equipped
|
||||||
|
with a SVM as the classifier:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
import quapy.functional as F
|
||||||
|
from sklearn.svm import LinearSVC
|
||||||
|
|
||||||
|
training, test = qp.datasets.fetch_twitter('hcr', pickle=True).train_test
|
||||||
|
|
||||||
|
# instantiate a classifier learner, in this case a SVM
|
||||||
|
svm = LinearSVC()
|
||||||
|
|
||||||
|
# instantiate a Classify & Count with the SVM
|
||||||
|
# (an alias is available in qp.method.aggregative.ClassifyAndCount)
|
||||||
|
model = qp.method.aggregative.CC(svm)
|
||||||
|
model.fit(training)
|
||||||
|
estim_prevalence = model.quantify(test.instances)
|
||||||
|
```
|
||||||
|
|
||||||
|
The same code could be used to instantiate an ACC, by simply replacing
|
||||||
|
the instantiation of the model with:
|
||||||
|
```python
|
||||||
|
model = qp.method.aggregative.ACC(svm)
|
||||||
|
```
|
||||||
|
Note that the adjusted variants (ACC and PACC) need to estimate
|
||||||
|
some parameters for performing the adjustment (e.g., the
|
||||||
|
_true positive rate_ and the _false positive rate_ in case of
|
||||||
|
binary classification) that are estimated on a validation split
|
||||||
|
of the labelled set. In this case, the __init__ method of
|
||||||
|
ACC defines an additional parameter, _val_split_ which, by
|
||||||
|
default, is set to 0.4 and so, the 40% of the labelled data
|
||||||
|
will be used for estimating the parameters for adjusting the
|
||||||
|
predictions. This parameters can also be set with an integer,
|
||||||
|
indicating that the parameters should be estimated by means of
|
||||||
|
_k_-fold cross-validation, for which the integer indicates the
|
||||||
|
number _k_ of folds. Finally, _val_split_ can be set to a
|
||||||
|
specific held-out validation set (i.e., an instance of _LabelledCollection_).
|
||||||
|
|
||||||
|
The specification of _val_split_ can be
|
||||||
|
postponed to the invokation of the fit method (if _val_split_ was also
|
||||||
|
set in the constructor, the one specified at fit time would prevail),
|
||||||
|
e.g.:
|
||||||
|
|
||||||
|
```python
|
||||||
|
model = qp.method.aggregative.ACC(svm)
|
||||||
|
# perform 5-fold cross validation for estimating ACC's parameters
|
||||||
|
# (overrides the default val_split=0.4 in the constructor)
|
||||||
|
model.fit(training, val_split=5)
|
||||||
|
```
|
||||||
|
|
||||||
|
The following code illustrates the case in which PCC is used:
|
||||||
|
|
||||||
|
```python
|
||||||
|
model = qp.method.aggregative.PCC(svm)
|
||||||
|
model.fit(training)
|
||||||
|
estim_prevalence = model.quantify(test.instances)
|
||||||
|
print('classifier:', model.classifier)
|
||||||
|
```
|
||||||
|
In this case, QuaPy will print:
|
||||||
|
```
|
||||||
|
The learner LinearSVC does not seem to be probabilistic. The learner will be calibrated.
|
||||||
|
classifier: CalibratedClassifierCV(base_estimator=LinearSVC(), cv=5)
|
||||||
|
```
|
||||||
|
The first output indicates that the learner (_LinearSVC_ in this case)
|
||||||
|
is not a probabilistic classifier (i.e., it does not implement the
|
||||||
|
_predict_proba_ method) and so, the classifier will be converted to
|
||||||
|
a probabilistic one through [calibration](https://scikit-learn.org/stable/modules/calibration.html).
|
||||||
|
As a result, the classifier that is printed in the second line points
|
||||||
|
to a _CalibratedClassifier_ instance. Note that calibration can only
|
||||||
|
be applied to hard classifiers when _fit_learner=True_; an exception
|
||||||
|
will be raised otherwise.
|
||||||
|
|
||||||
|
Lastly, everything we said aboud ACC and PCC
|
||||||
|
applies to PACC as well.
|
||||||
|
|
||||||
|
|
||||||
|
### Expectation Maximization (EMQ)
|
||||||
|
|
||||||
|
The Expectation Maximization Quantifier (EMQ), also known as
|
||||||
|
the SLD, is available at _qp.method.aggregative.EMQ_ or via the
|
||||||
|
alias _qp.method.aggregative.ExpectationMaximizationQuantifier_.
|
||||||
|
The method is described in:
|
||||||
|
|
||||||
|
_Saerens, M., Latinne, P., and Decaestecker, C. (2002). Adjusting the outputs of a classifier
|
||||||
|
to new a priori probabilities: A simple procedure. Neural Computation, 14(1):21–41._
|
||||||
|
|
||||||
|
EMQ works with a probabilistic classifier (if the classifier
|
||||||
|
given as input is a hard one, a calibration will be attempted).
|
||||||
|
Although this method was originally proposed for improving the
|
||||||
|
posterior probabilities of a probabilistic classifier, and not
|
||||||
|
for improving the estimation of prior probabilities, EMQ ranks
|
||||||
|
almost always among the most effective quantifiers in the
|
||||||
|
experiments we have carried out.
|
||||||
|
|
||||||
|
An example of use can be found below:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
|
dataset = qp.datasets.fetch_twitter('hcr', pickle=True)
|
||||||
|
|
||||||
|
model = qp.method.aggregative.EMQ(LogisticRegression())
|
||||||
|
model.fit(dataset.training)
|
||||||
|
estim_prevalence = model.quantify(dataset.test.instances)
|
||||||
|
```
|
||||||
|
|
||||||
|
_New in v0.1.7_: EMQ now accepts two new parameters in the construction method, namely
|
||||||
|
_exact_train_prev_ which allows to use the true training prevalence as the departing
|
||||||
|
prevalence estimation (default behaviour), or instead an approximation of it as
|
||||||
|
suggested by [Alexandari et al. (2020)](http://proceedings.mlr.press/v119/alexandari20a.html)
|
||||||
|
(by setting _exact_train_prev=False_).
|
||||||
|
The other parameter is _recalib_ which allows to indicate a calibration method, among those
|
||||||
|
proposed by [Alexandari et al. (2020)](http://proceedings.mlr.press/v119/alexandari20a.html),
|
||||||
|
including the Bias-Corrected Temperature Scaling, Vector Scaling, etc.
|
||||||
|
See the API documentation for further details.
|
||||||
|
|
||||||
|
|
||||||
|
### Hellinger Distance y (HDy)
|
||||||
|
|
||||||
|
Implementation of the method based on the Hellinger Distance y (HDy) proposed by
|
||||||
|
[González-Castro, V., Alaiz-Rodrı́guez, R., and Alegre, E. (2013). Class distribution
|
||||||
|
estimation based on the Hellinger distance. Information Sciences, 218:146–164.](https://www.sciencedirect.com/science/article/pii/S0020025512004069)
|
||||||
|
|
||||||
|
It is implemented in _qp.method.aggregative.HDy_ (also accessible
|
||||||
|
through the allias _qp.method.aggregative.HellingerDistanceY_).
|
||||||
|
This method works with a probabilistic classifier (hard classifiers
|
||||||
|
can be used as well and will be calibrated) and requires a validation
|
||||||
|
set to estimate parameter for the mixture model. Just like
|
||||||
|
ACC and PACC, this quantifier receives a _val_split_ argument
|
||||||
|
in the constructor (or in the fit method, in which case the previous
|
||||||
|
value is overridden) that can either be a float indicating the proportion
|
||||||
|
of training data to be taken as the validation set (in a random
|
||||||
|
stratified split), or a validation set (i.e., an instance of
|
||||||
|
_LabelledCollection_) itself.
|
||||||
|
|
||||||
|
HDy was proposed as a binary classifier and the implementation
|
||||||
|
provided in QuaPy accepts only binary datasets.
|
||||||
|
|
||||||
|
The following code shows an example of use:
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
|
# load a binary dataset
|
||||||
|
dataset = qp.datasets.fetch_reviews('hp', pickle=True)
|
||||||
|
qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
|
||||||
|
|
||||||
|
model = qp.method.aggregative.HDy(LogisticRegression())
|
||||||
|
model.fit(dataset.training)
|
||||||
|
estim_prevalence = model.quantify(dataset.test.instances)
|
||||||
|
```
|
||||||
|
|
||||||
|
_New in v0.1.7:_ QuaPy now provides an implementation of the generalized
|
||||||
|
"Distribution Matching" approaches for multiclass, inspired by the framework
|
||||||
|
of [Firat (2016)](https://arxiv.org/abs/1606.00868). One can instantiate
|
||||||
|
a variant of HDy for multiclass quantification as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
mutliclassHDy = qp.method.aggregative.DistributionMatching(classifier=LogisticRegression(), divergence='HD', cdf=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
_New in v0.1.7:_ QuaPy now provides an implementation of the "DyS"
|
||||||
|
framework proposed by [Maletzke et al (2020)](https://ojs.aaai.org/index.php/AAAI/article/view/4376)
|
||||||
|
and the "SMM" method proposed by [Hassan et al (2019)](https://ieeexplore.ieee.org/document/9260028)
|
||||||
|
(thanks to _Pablo González_ for the contributions!)
|
||||||
|
|
||||||
|
### Threshold Optimization methods
|
||||||
|
|
||||||
|
_New in v0.1.7:_ QuaPy now implements Forman's threshold optimization methods;
|
||||||
|
see, e.g., [(Forman 2006)](https://dl.acm.org/doi/abs/10.1145/1150402.1150423)
|
||||||
|
and [(Forman 2008)](https://link.springer.com/article/10.1007/s10618-008-0097-y).
|
||||||
|
These include: T50, MAX, X, Median Sweep (MS), and its variant MS2.
|
||||||
|
|
||||||
|
### Explicit Loss Minimization
|
||||||
|
|
||||||
|
The Explicit Loss Minimization (ELM) represent a family of methods
|
||||||
|
based on structured output learning, i.e., quantifiers relying on
|
||||||
|
classifiers that have been optimized targeting a
|
||||||
|
quantification-oriented evaluation measure.
|
||||||
|
The original methods are implemented in QuaPy as classify & count (CC)
|
||||||
|
quantifiers that use Joachim's [SVMperf](https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html)
|
||||||
|
as the underlying classifier, properly set to optimize for the desired loss.
|
||||||
|
|
||||||
|
In QuaPy, this can be more achieved by calling the functions:
|
||||||
|
|
||||||
|
* _newSVMQ_: returns the quantification method called SVM(Q) that optimizes for the metric _Q_ defined
|
||||||
|
in [_Barranquero, J., Díez, J., and del Coz, J. J. (2015). Quantification-oriented learning based
|
||||||
|
on reliable classifiers. Pattern Recognition, 48(2):591–604._](https://www.sciencedirect.com/science/article/pii/S003132031400291X)
|
||||||
|
* _newSVMKLD_ and _newSVMNKLD_: returns the quantification method called SVM(KLD) and SVM(nKLD), standing for
|
||||||
|
Kullback-Leibler Divergence and Normalized Kullback-Leibler Divergence, as proposed in [_Esuli, A. and Sebastiani, F. (2015).
|
||||||
|
Optimizing text quantifiers for multivariate loss functions.
|
||||||
|
ACM Transactions on Knowledge Discovery and Data, 9(4):Article 27._](https://dl.acm.org/doi/abs/10.1145/2700406)
|
||||||
|
* _newSVMAE_ and _newSVMRAE_: returns a quantification method called SVM(AE) and SVM(RAE) that optimizes for the (Mean) Absolute Error and for the
|
||||||
|
(Mean) Relative Absolute Error, as first used by
|
||||||
|
[_Moreo, A. and Sebastiani, F. (2021). Tweet sentiment quantification: An experimental re-evaluation. PLOS ONE 17 (9), 1-23._](https://arxiv.org/abs/2011.02552)
|
||||||
|
|
||||||
|
the last two methods (SVM(AE) and SVM(RAE)) have been implemented in
|
||||||
|
QuaPy in order to make available ELM variants for what nowadays
|
||||||
|
are considered the most well-behaved evaluation metrics in quantification.
|
||||||
|
|
||||||
|
In order to make these models work, you would need to run the script
|
||||||
|
_prepare_svmperf.sh_ (distributed along with QuaPy) that
|
||||||
|
downloads _SVMperf_' source code, applies a patch that
|
||||||
|
implements the quantification oriented losses, and compiles the
|
||||||
|
sources.
|
||||||
|
|
||||||
|
If you want to add any custom loss, you would need to modify
|
||||||
|
the source code of _SVMperf_ in order to implement it, and
|
||||||
|
assign a valid loss code to it. Then you must re-compile
|
||||||
|
the whole thing and instantiate the quantifier in QuaPy
|
||||||
|
as follows:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# you can either set the path to your custom svm_perf_quantification implementation
|
||||||
|
# in the environment variable, or as an argument to the constructor of ELM
|
||||||
|
qp.environ['SVMPERF_HOME'] = './path/to/svm_perf_quantification'
|
||||||
|
|
||||||
|
# assign an alias to your custom loss and the id you have assigned to it
|
||||||
|
svmperf = qp.classification.svmperf.SVMperf
|
||||||
|
svmperf.valid_losses['mycustomloss'] = 28
|
||||||
|
|
||||||
|
# instantiate the ELM method indicating the loss
|
||||||
|
model = qp.method.aggregative.ELM(loss='mycustomloss')
|
||||||
|
```
|
||||||
|
|
||||||
|
All ELM are binary quantifiers since they rely on _SVMperf_, that
|
||||||
|
currently supports only binary classification.
|
||||||
|
ELM variants (any binary quantifier in general) can be extended
|
||||||
|
to operate in single-label scenarios trivially by adopting a
|
||||||
|
"one-vs-all" strategy (as, e.g., in
|
||||||
|
[_Gao, W. and Sebastiani, F. (2016). From classification to quantification in tweet sentiment
|
||||||
|
analysis. Social Network Analysis and Mining, 6(19):1–22_](https://link.springer.com/article/10.1007/s13278-016-0327-z)).
|
||||||
|
In QuaPy this is possible by using the _OneVsAll_ class.
|
||||||
|
|
||||||
|
There are two ways for instantiating this class, _OneVsAllGeneric_ that works for
|
||||||
|
any quantifier, and _OneVsAllAggregative_ that is optimized for aggregative quantifiers.
|
||||||
|
In general, you can simply use the _getOneVsAll_ function and QuaPy will choose
|
||||||
|
the more convenient of the two.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
from quapy.method.aggregative import SVMQ
|
||||||
|
|
||||||
|
# load a single-label dataset (this one contains 3 classes)
|
||||||
|
dataset = qp.datasets.fetch_twitter('hcr', pickle=True)
|
||||||
|
|
||||||
|
# let qp know where svmperf is
|
||||||
|
qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification'
|
||||||
|
|
||||||
|
model = getOneVsAll(SVMQ(), n_jobs=-1) # run them on parallel
|
||||||
|
model.fit(dataset.training)
|
||||||
|
estim_prevalence = model.quantify(dataset.test.instances)
|
||||||
|
```
|
||||||
|
|
||||||
|
Check the examples _[explicit_loss_minimization.py](..%2Fexamples%2Fexplicit_loss_minimization.py)_
|
||||||
|
and [one_vs_all.py](..%2Fexamples%2Fone_vs_all.py) for more details.
|
||||||
|
|
||||||
|
## Meta Models
|
||||||
|
|
||||||
|
By _meta_ models we mean quantification methods that are defined on top of other
|
||||||
|
quantification methods, and that thus do not squarely belong to the aggregative nor
|
||||||
|
the non-aggregative group (indeed, _meta_ models could use quantifiers from any of those
|
||||||
|
groups).
|
||||||
|
_Meta_ models are implemented in the _qp.method.meta_ module.
|
||||||
|
|
||||||
|
### Ensembles
|
||||||
|
|
||||||
|
QuaPy implements (some of) the variants proposed in:
|
||||||
|
|
||||||
|
* [_Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).
|
||||||
|
Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.
|
||||||
|
Information Fusion, 34, 87-100._](https://www.sciencedirect.com/science/article/pii/S1566253516300628)
|
||||||
|
* [_Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).
|
||||||
|
Dynamic ensemble selection for quantification tasks.
|
||||||
|
Information Fusion, 45, 1-15._](https://www.sciencedirect.com/science/article/pii/S1566253517303652)
|
||||||
|
|
||||||
|
The following code shows how to instantiate an Ensemble of 30 _Adjusted Classify & Count_ (ACC)
|
||||||
|
quantifiers operating with a _Logistic Regressor_ (LR) as the base classifier, and using the
|
||||||
|
_average_ as the aggregation policy (see the original article for further details).
|
||||||
|
The last parameter indicates to use all processors for parallelization.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
from quapy.method.aggregative import ACC
|
||||||
|
from quapy.method.meta import Ensemble
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
|
dataset = qp.datasets.fetch_UCIDataset('haberman')
|
||||||
|
|
||||||
|
model = Ensemble(quantifier=ACC(LogisticRegression()), size=30, policy='ave', n_jobs=-1)
|
||||||
|
model.fit(dataset.training)
|
||||||
|
estim_prevalence = model.quantify(dataset.test.instances)
|
||||||
|
```
|
||||||
|
|
||||||
|
Other aggregation policies implemented in QuaPy include:
|
||||||
|
* 'ptr' for applying a dynamic selection based on the training prevalence of the ensemble's members
|
||||||
|
* 'ds' for applying a dynamic selection based on the Hellinger Distance
|
||||||
|
* _any valid quantification measure_ (e.g., 'mse') for performing a static selection based on
|
||||||
|
the performance estimated for each member of the ensemble in terms of that evaluation metric.
|
||||||
|
|
||||||
|
When using any of the above options, it is important to set the _red_size_ parameter, which
|
||||||
|
informs of the number of members to retain.
|
||||||
|
|
||||||
|
Please, check the [model selection](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection)
|
||||||
|
wiki if you want to optimize the hyperparameters of ensemble for classification or quantification.
|
||||||
|
|
||||||
|
### The QuaNet neural network
|
||||||
|
|
||||||
|
QuaPy offers an implementation of QuaNet, a deep learning model presented in:
|
||||||
|
|
||||||
|
[_Esuli, A., Moreo, A., & Sebastiani, F. (2018, October).
|
||||||
|
A recurrent neural network for sentiment quantification.
|
||||||
|
In Proceedings of the 27th ACM International Conference on
|
||||||
|
Information and Knowledge Management (pp. 1775-1778)._](https://dl.acm.org/doi/abs/10.1145/3269206.3269287)
|
||||||
|
|
||||||
|
This model requires _torch_ to be installed.
|
||||||
|
QuaNet also requires a classifier that can provide embedded representations
|
||||||
|
of the inputs.
|
||||||
|
In the original paper, QuaNet was tested using an LSTM as the base classifier.
|
||||||
|
In the following example, we show an instantiation of QuaNet that instead uses CNN as a probabilistic classifier, taking its last layer representation as the document embedding:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import quapy as qp
|
||||||
|
from quapy.method.meta import QuaNet
|
||||||
|
from quapy.classification.neural import NeuralClassifierTrainer, CNNnet
|
||||||
|
|
||||||
|
# use samples of 100 elements
|
||||||
|
qp.environ['SAMPLE_SIZE'] = 100
|
||||||
|
|
||||||
|
# load the kindle dataset as text, and convert words to numerical indexes
|
||||||
|
dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
|
||||||
|
qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
|
||||||
|
|
||||||
|
# the text classifier is a CNN trained by NeuralClassifierTrainer
|
||||||
|
cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
|
||||||
|
learner = NeuralClassifierTrainer(cnn, device='cuda')
|
||||||
|
|
||||||
|
# train QuaNet
|
||||||
|
model = QuaNet(learner, device='cuda')
|
||||||
|
model.fit(dataset.training)
|
||||||
|
estim_prevalence = model.quantify(dataset.test.instances)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
@ -33,18 +33,18 @@ of scenarios exhibiting different degrees of prior
|
||||||
probability shift.
|
probability shift.
|
||||||
|
|
||||||
The class _qp.model_selection.GridSearchQ_ implements a grid-search exploration over the space of
|
The class _qp.model_selection.GridSearchQ_ implements a grid-search exploration over the space of
|
||||||
hyper-parameter combinations that [evaluates](./evaluation)
|
hyper-parameter combinations that [evaluates](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation)
|
||||||
each combination of hyper-parameters by means of a given quantification-oriented
|
each combination of hyper-parameters by means of a given quantification-oriented
|
||||||
error metric (e.g., any of the error functions implemented
|
error metric (e.g., any of the error functions implemented
|
||||||
in _qp.error_) and according to a
|
in _qp.error_) and according to a
|
||||||
[sampling generation protocol](./protocols).
|
[sampling generation protocol](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols).
|
||||||
|
|
||||||
The following is an example (also included in the examples folder) of model selection for quantification:
|
The following is an example (also included in the examples folder) of model selection for quantification:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.protocol import APP
|
from quapy.protocol import APP
|
||||||
from quapy.method.aggregative import DMy
|
from quapy.method.aggregative import DistributionMatching
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
|
@ -52,7 +52,7 @@ import numpy as np
|
||||||
In this example, we show how to perform model selection on a DistributionMatching quantifier.
|
In this example, we show how to perform model selection on a DistributionMatching quantifier.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model = DMy(LogisticRegression())
|
model = DistributionMatching(LogisticRegression())
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = 100
|
qp.environ['SAMPLE_SIZE'] = 100
|
||||||
qp.environ['N_JOBS'] = -1 # explore hyper-parameters in parallel
|
qp.environ['N_JOBS'] = -1 # explore hyper-parameters in parallel
|
||||||
|
|
@ -76,7 +76,7 @@ protocol = APP(validation)
|
||||||
# in order to let the quantifier know this hyper-parameter belongs to its underlying
|
# in order to let the quantifier know this hyper-parameter belongs to its underlying
|
||||||
# classifier.
|
# classifier.
|
||||||
param_grid = {
|
param_grid = {
|
||||||
'classifier__C': np.logspace(-3, 3, 7),
|
'classifier__C': np.logspace(-3,3,7),
|
||||||
'nbins': [8, 16, 32, 64],
|
'nbins': [8, 16, 32, 64],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -87,7 +87,7 @@ model = qp.model_selection.GridSearchQ(
|
||||||
error='mae', # the error to optimize is the MAE (a quantification-oriented loss)
|
error='mae', # the error to optimize is the MAE (a quantification-oriented loss)
|
||||||
refit=True, # retrain on the whole labelled set once done
|
refit=True, # retrain on the whole labelled set once done
|
||||||
verbose=True # show information as the process goes on
|
verbose=True # show information as the process goes on
|
||||||
).fit(*training.Xy)
|
).fit(training)
|
||||||
|
|
||||||
print(f'model selection ended: best hyper-parameters={model.best_params_}')
|
print(f'model selection ended: best hyper-parameters={model.best_params_}')
|
||||||
model = model.best_model_
|
model = model.best_model_
|
||||||
|
|
@ -114,6 +114,11 @@ model selection ended: best hyper-parameters={'classifier__C': 100.0, 'nbins': 3
|
||||||
MAE=0.03102
|
MAE=0.03102
|
||||||
```
|
```
|
||||||
|
|
||||||
|
The parameter _val_split_ can alternatively be used to indicate
|
||||||
|
a validation set (i.e., an instance of _LabelledCollection_) instead
|
||||||
|
of a proportion. This could be useful if one wants to have control
|
||||||
|
on the specific data split to be used across different model selection
|
||||||
|
experiments.
|
||||||
|
|
||||||
## Targeting a Classification-oriented loss
|
## Targeting a Classification-oriented loss
|
||||||
|
|
||||||
|
|
@ -133,7 +138,7 @@ learner = GridSearchCV(
|
||||||
LogisticRegression(),
|
LogisticRegression(),
|
||||||
param_grid={'C': np.logspace(-4, 5, 10), 'class_weight': ['balanced', None]},
|
param_grid={'C': np.logspace(-4, 5, 10), 'class_weight': ['balanced', None]},
|
||||||
cv=5)
|
cv=5)
|
||||||
model = DistributionMatching(learner).fit(*dataset.train.Xy)
|
model = DistributionMatching(learner).fit(dataset.training)
|
||||||
```
|
```
|
||||||
|
|
||||||
However, this is conceptually flawed, since the model should be
|
However, this is conceptually flawed, since the model should be
|
||||||
|
|
@ -2,9 +2,6 @@
|
||||||
|
|
||||||
The module _qp.plot_ implements some basic plotting functions
|
The module _qp.plot_ implements some basic plotting functions
|
||||||
that can help analyse the performance of a quantification method.
|
that can help analyse the performance of a quantification method.
|
||||||
See the provided
|
|
||||||
[code example](https://github.com/HLT-ISTI/QuaPy/blob/master/examples/13.plotting.py)
|
|
||||||
for a full example.
|
|
||||||
|
|
||||||
All plotting functions receive as inputs the outcomes of
|
All plotting functions receive as inputs the outcomes of
|
||||||
some experiments and include, for each experiment,
|
some experiments and include, for each experiment,
|
||||||
|
|
@ -46,7 +43,7 @@ quantification methods across different scenarios showcasing
|
||||||
the accuracy of the quantifier in predicting class prevalences
|
the accuracy of the quantifier in predicting class prevalences
|
||||||
for a wide range of prior distributions. This can easily be
|
for a wide range of prior distributions. This can easily be
|
||||||
achieved by means of the
|
achieved by means of the
|
||||||
[artificial sampling protocol](./protocols)
|
[artificial sampling protocol](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols)
|
||||||
that is implemented in QuaPy.
|
that is implemented in QuaPy.
|
||||||
|
|
||||||
The following code shows how to perform one simple experiment
|
The following code shows how to perform one simple experiment
|
||||||
|
|
@ -80,7 +77,7 @@ def gen_data():
|
||||||
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
|
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
|
||||||
|
|
||||||
for method_name, model in models():
|
for method_name, model in models():
|
||||||
model.fit(*train.Xy)
|
model.fit(train)
|
||||||
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
||||||
|
|
||||||
method_names.append(method_name)
|
method_names.append(method_name)
|
||||||
|
|
@ -116,7 +113,7 @@ are '.png' or '.pdf'). If this path is not provided, then the plot
|
||||||
will be shown but not saved.
|
will be shown but not saved.
|
||||||
The resulting plot should look like:
|
The resulting plot should look like:
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
Note that in this case, we are also indicating the training
|
Note that in this case, we are also indicating the training
|
||||||
prevalence, which is plotted in the diagonal a as cyan dot.
|
prevalence, which is plotted in the diagonal a as cyan dot.
|
||||||
|
|
@ -141,7 +138,7 @@ qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./pl
|
||||||
|
|
||||||
and should look like:
|
and should look like:
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
The box plots show some interesting facts:
|
The box plots show some interesting facts:
|
||||||
* all methods are biased towards the training prevalence but specially
|
* all methods are biased towards the training prevalence but specially
|
||||||
|
|
@ -174,7 +171,7 @@ def gen_data():
|
||||||
training_size = 5000
|
training_size = 5000
|
||||||
# since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained
|
# since the problem is binary, it suffices to specify the negative prevalence, since the positive is constrained
|
||||||
train_sample = train.sampling(training_size, 1-training_prevalence)
|
train_sample = train.sampling(training_size, 1-training_prevalence)
|
||||||
model.fit(*train_sample.Xy)
|
model.fit(train_sample)
|
||||||
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
true_prev, estim_prev = qp.evaluation.prediction(model, APP(test, repeats=100, random_state=0))
|
||||||
method_name = 'CC$_{'+f'{int(100*training_prevalence)}' + '\%}$'
|
method_name = 'CC$_{'+f'{int(100*training_prevalence)}' + '\%}$'
|
||||||
method_data.append((method_name, true_prev, estim_prev, train_sample.prevalence()))
|
method_data.append((method_name, true_prev, estim_prev, train_sample.prevalence()))
|
||||||
|
|
@ -184,7 +181,7 @@ def gen_data():
|
||||||
|
|
||||||
and the plot should now look like:
|
and the plot should now look like:
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
which clearly shows a negative bias for CC variants trained on
|
which clearly shows a negative bias for CC variants trained on
|
||||||
data containing more negatives (i.e., < 50%) and positive biases
|
data containing more negatives (i.e., < 50%) and positive biases
|
||||||
|
|
@ -198,7 +195,7 @@ To this aim, an argument _nbins_ is passed which indicates
|
||||||
how many isometric subintervals to take. For example
|
how many isometric subintervals to take. For example
|
||||||
the following plot is produced for _nbins=3_:
|
the following plot is produced for _nbins=3_:
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
Interestingly enough, the seemingly unbiased estimator (CC at 50%) happens to display
|
Interestingly enough, the seemingly unbiased estimator (CC at 50%) happens to display
|
||||||
a positive bias (or a tendency to overestimate) in cases of low prevalence
|
a positive bias (or a tendency to overestimate) in cases of low prevalence
|
||||||
|
|
@ -208,7 +205,7 @@ and a negative bias (or a tendency to underestimate) in cases of high prevalence
|
||||||
|
|
||||||
Out of curiosity, the diagonal plot for this experiment looks like:
|
Out of curiosity, the diagonal plot for this experiment looks like:
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
showing pretty clearly the dependency of CC on the prior probabilities
|
showing pretty clearly the dependency of CC on the prior probabilities
|
||||||
of the labeled set it was trained on.
|
of the labeled set it was trained on.
|
||||||
|
|
@ -237,7 +234,7 @@ qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs,
|
||||||
error_name='ae', n_bins=10, savepath='./plots/err_drift.png')
|
error_name='ae', n_bins=10, savepath='./plots/err_drift.png')
|
||||||
```
|
```
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
Note that all methods work reasonably well in cases of low prevalence
|
Note that all methods work reasonably well in cases of low prevalence
|
||||||
drift (i.e., any CC-variant is a good quantifier whenever the IID
|
drift (i.e., any CC-variant is a good quantifier whenever the IID
|
||||||
|
|
@ -0,0 +1,92 @@
|
||||||
|
.. QuaPy documentation master file, created by
|
||||||
|
sphinx-quickstart on Tue Nov 9 11:31:32 2021.
|
||||||
|
You can adapt this file completely to your liking, but it should at least
|
||||||
|
contain the root `toctree` directive.
|
||||||
|
|
||||||
|
Welcome to QuaPy's documentation!
|
||||||
|
=================================
|
||||||
|
|
||||||
|
QuaPy is an open source framework for Quantification (a.k.a. Supervised Prevalence Estimation)
|
||||||
|
written in Python.
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
QuaPy roots on the concept of data sample, and provides implementations of most important concepts
|
||||||
|
in quantification literature, such as the most important quantification baselines, many advanced
|
||||||
|
quantification methods, quantification-oriented model selection, many evaluation measures and protocols
|
||||||
|
used for evaluating quantification methods.
|
||||||
|
QuaPy also integrates commonly used datasets and offers visualization tools for facilitating the analysis and
|
||||||
|
interpretation of results.
|
||||||
|
|
||||||
|
A quick example:
|
||||||
|
****************
|
||||||
|
|
||||||
|
The following script fetchs a Twitter dataset, trains and evaluates an
|
||||||
|
`Adjusted Classify & Count` model in terms of the `Mean Absolute Error` (MAE)
|
||||||
|
between the class prevalences estimated for the test set and the true prevalences
|
||||||
|
of the test set.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
import quapy as qp
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
|
dataset = qp.datasets.fetch_twitter('semeval16')
|
||||||
|
|
||||||
|
# create an "Adjusted Classify & Count" quantifier
|
||||||
|
model = qp.method.aggregative.ACC(LogisticRegression())
|
||||||
|
model.fit(dataset.training)
|
||||||
|
|
||||||
|
estim_prevalences = model.quantify(dataset.test.instances)
|
||||||
|
true_prevalences = dataset.test.prevalence()
|
||||||
|
|
||||||
|
error = qp.error.mae(true_prevalences, estim_prevalences)
|
||||||
|
|
||||||
|
print(f'Mean Absolute Error (MAE)={error:.3f}')
|
||||||
|
|
||||||
|
|
||||||
|
Quantification is useful in scenarios of prior probability shift. In other
|
||||||
|
words, we would not be interested in estimating the class prevalences of the test set if
|
||||||
|
we could assume the IID assumption to hold, as this prevalence would simply coincide with the
|
||||||
|
class prevalence of the training set. For this reason, any Quantification model
|
||||||
|
should be tested across samples characterized by different class prevalences.
|
||||||
|
QuaPy implements sampling procedures and evaluation protocols that automates this endeavour.
|
||||||
|
See the :doc:`Evaluation` for detailed examples.
|
||||||
|
|
||||||
|
Features
|
||||||
|
********
|
||||||
|
|
||||||
|
* Implementation of most popular quantification methods (Classify-&-Count variants, Expectation-Maximization, SVM-based variants for quantification, HDy, QuaNet, and Ensembles).
|
||||||
|
* Versatile functionality for performing evaluation based on artificial sampling protocols.
|
||||||
|
* Implementation of most commonly used evaluation metrics (e.g., MAE, MRAE, MSE, NKLD, etc.).
|
||||||
|
* Popular datasets for Quantification (textual and numeric) available, including:
|
||||||
|
* 32 UCI Machine Learning datasets.
|
||||||
|
* 11 Twitter Sentiment datasets.
|
||||||
|
* 3 Reviews Sentiment datasets.
|
||||||
|
* 4 tasks from LeQua competition (_new in v0.1.7!_)
|
||||||
|
* Native supports for binary and single-label scenarios of quantification.
|
||||||
|
* Model selection functionality targeting quantification-oriented losses.
|
||||||
|
* Visualization tools for analysing results.
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
:caption: Contents:
|
||||||
|
|
||||||
|
Installation
|
||||||
|
Datasets
|
||||||
|
Evaluation
|
||||||
|
Protocols
|
||||||
|
Methods
|
||||||
|
Model-Selection
|
||||||
|
Plotting
|
||||||
|
API Developers documentation<modules>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Indices and tables
|
||||||
|
==================
|
||||||
|
|
||||||
|
* :ref:`genindex`
|
||||||
|
* :ref:`modindex`
|
||||||
|
* :ref:`search`
|
||||||
|
|
@ -1,35 +1,38 @@
|
||||||
|
:tocdepth: 2
|
||||||
|
|
||||||
quapy.classification package
|
quapy.classification package
|
||||||
============================
|
============================
|
||||||
|
|
||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
quapy.classification.calibration module
|
quapy.classification.calibration
|
||||||
---------------------------------------
|
--------------------------------
|
||||||
|
|
||||||
|
.. versionadded:: 0.1.7
|
||||||
.. automodule:: quapy.classification.calibration
|
.. automodule:: quapy.classification.calibration
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.classification.methods module
|
quapy.classification.methods
|
||||||
-----------------------------------
|
----------------------------
|
||||||
|
|
||||||
.. automodule:: quapy.classification.methods
|
.. automodule:: quapy.classification.methods
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.classification.neural module
|
quapy.classification.neural
|
||||||
----------------------------------
|
---------------------------
|
||||||
|
|
||||||
.. automodule:: quapy.classification.neural
|
.. automodule:: quapy.classification.neural
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.classification.svmperf module
|
quapy.classification.svmperf
|
||||||
-----------------------------------
|
----------------------------
|
||||||
|
|
||||||
.. automodule:: quapy.classification.svmperf
|
.. automodule:: quapy.classification.svmperf
|
||||||
:members:
|
:members:
|
||||||
|
|
@ -1,36 +1,37 @@
|
||||||
|
:tocdepth: 2
|
||||||
|
|
||||||
quapy.data package
|
quapy.data package
|
||||||
==================
|
==================
|
||||||
|
|
||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
quapy.data.base module
|
quapy.data.base
|
||||||
----------------------
|
---------------
|
||||||
|
|
||||||
.. automodule:: quapy.data.base
|
.. automodule:: quapy.data.base
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.data.datasets module
|
quapy.data.datasets
|
||||||
--------------------------
|
-------------------
|
||||||
|
|
||||||
.. automodule:: quapy.data.datasets
|
.. automodule:: quapy.data.datasets
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
quapy.data.preprocessing
|
||||||
quapy.data.preprocessing module
|
------------------------
|
||||||
-------------------------------
|
|
||||||
|
|
||||||
.. automodule:: quapy.data.preprocessing
|
.. automodule:: quapy.data.preprocessing
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.data.reader module
|
quapy.data.reader
|
||||||
------------------------
|
-----------------
|
||||||
|
|
||||||
.. automodule:: quapy.data.reader
|
.. automodule:: quapy.data.reader
|
||||||
:members:
|
:members:
|
||||||
|
|
@ -1,61 +1,47 @@
|
||||||
|
:tocdepth: 2
|
||||||
|
|
||||||
quapy.method package
|
quapy.method package
|
||||||
====================
|
====================
|
||||||
|
|
||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
quapy.method.aggregative module
|
quapy.method.aggregative
|
||||||
-------------------------------
|
------------------------
|
||||||
|
|
||||||
.. automodule:: quapy.method.aggregative
|
.. automodule:: quapy.method.aggregative
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. automodule:: quapy.method._kdey
|
quapy.method.base
|
||||||
:members:
|
-----------------
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
.. automodule:: quapy.method._neural
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
.. automodule:: quapy.method._threshold_optim
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
|
|
||||||
quapy.method.base module
|
|
||||||
------------------------
|
|
||||||
|
|
||||||
.. automodule:: quapy.method.base
|
.. automodule:: quapy.method.base
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.method.meta module
|
quapy.method.meta
|
||||||
------------------------
|
-----------------
|
||||||
|
|
||||||
.. automodule:: quapy.method.meta
|
.. automodule:: quapy.method.meta
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.method.non\_aggregative module
|
quapy.method.neural
|
||||||
------------------------------------
|
-------------------
|
||||||
|
|
||||||
.. automodule:: quapy.method.non_aggregative
|
.. automodule:: quapy.method.neural
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.method.composable module
|
quapy.method.non\_aggregative
|
||||||
------------------------------
|
-----------------------------
|
||||||
|
|
||||||
.. automodule:: quapy.method.composable
|
.. automodule:: quapy.method.non_aggregative
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
@ -1,76 +1,79 @@
|
||||||
|
:tocdepth: 2
|
||||||
|
|
||||||
quapy package
|
quapy package
|
||||||
=============
|
=============
|
||||||
|
|
||||||
Subpackages
|
|
||||||
-----------
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 4
|
|
||||||
|
|
||||||
quapy.classification
|
|
||||||
quapy.data
|
|
||||||
quapy.method
|
|
||||||
|
|
||||||
|
|
||||||
Submodules
|
Submodules
|
||||||
----------
|
----------
|
||||||
|
|
||||||
quapy.error module
|
quapy.error
|
||||||
------------------
|
-----------
|
||||||
|
|
||||||
.. automodule:: quapy.error
|
.. automodule:: quapy.error
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.evaluation module
|
quapy.evaluation
|
||||||
-----------------------
|
----------------
|
||||||
|
|
||||||
.. automodule:: quapy.evaluation
|
.. automodule:: quapy.evaluation
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.functional module
|
quapy.protocol
|
||||||
-----------------------
|
--------------
|
||||||
|
|
||||||
|
.. versionadded:: 0.1.7
|
||||||
|
.. automodule:: quapy.protocol
|
||||||
|
:members:
|
||||||
|
:undoc-members:
|
||||||
|
:show-inheritance:
|
||||||
|
|
||||||
|
quapy.functional
|
||||||
|
----------------
|
||||||
|
|
||||||
.. automodule:: quapy.functional
|
.. automodule:: quapy.functional
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.model\_selection module
|
quapy.model\_selection
|
||||||
-----------------------------
|
----------------------
|
||||||
|
|
||||||
.. automodule:: quapy.model_selection
|
.. automodule:: quapy.model_selection
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.plot module
|
quapy.plot
|
||||||
-----------------
|
----------
|
||||||
|
|
||||||
.. automodule:: quapy.plot
|
.. automodule:: quapy.plot
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
quapy.protocol module
|
quapy.util
|
||||||
---------------------
|
----------
|
||||||
|
|
||||||
.. automodule:: quapy.protocol
|
|
||||||
:members:
|
|
||||||
:undoc-members:
|
|
||||||
:show-inheritance:
|
|
||||||
|
|
||||||
quapy.util module
|
|
||||||
-----------------
|
|
||||||
|
|
||||||
.. automodule:: quapy.util
|
.. automodule:: quapy.util
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
Subpackages
|
||||||
|
-----------
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 3
|
||||||
|
|
||||||
|
quapy.classification
|
||||||
|
quapy.data
|
||||||
|
quapy.method
|
||||||
|
|
||||||
|
|
||||||
Module contents
|
Module contents
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
|
@ -78,3 +81,4 @@ Module contents
|
||||||
:members:
|
:members:
|
||||||
:undoc-members:
|
:undoc-members:
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
|
|
@ -1,134 +0,0 @@
|
||||||
/*
|
|
||||||
* _sphinx_javascript_frameworks_compat.js
|
|
||||||
* ~~~~~~~~~~
|
|
||||||
*
|
|
||||||
* Compatability shim for jQuery and underscores.js.
|
|
||||||
*
|
|
||||||
* WILL BE REMOVED IN Sphinx 6.0
|
|
||||||
* xref RemovedInSphinx60Warning
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* select a different prefix for underscore
|
|
||||||
*/
|
|
||||||
$u = _.noConflict();
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* small helper function to urldecode strings
|
|
||||||
*
|
|
||||||
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
|
|
||||||
*/
|
|
||||||
jQuery.urldecode = function(x) {
|
|
||||||
if (!x) {
|
|
||||||
return x
|
|
||||||
}
|
|
||||||
return decodeURIComponent(x.replace(/\+/g, ' '));
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* small helper function to urlencode strings
|
|
||||||
*/
|
|
||||||
jQuery.urlencode = encodeURIComponent;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This function returns the parsed url parameters of the
|
|
||||||
* current request. Multiple values per key are supported,
|
|
||||||
* it will always return arrays of strings for the value parts.
|
|
||||||
*/
|
|
||||||
jQuery.getQueryParameters = function(s) {
|
|
||||||
if (typeof s === 'undefined')
|
|
||||||
s = document.location.search;
|
|
||||||
var parts = s.substr(s.indexOf('?') + 1).split('&');
|
|
||||||
var result = {};
|
|
||||||
for (var i = 0; i < parts.length; i++) {
|
|
||||||
var tmp = parts[i].split('=', 2);
|
|
||||||
var key = jQuery.urldecode(tmp[0]);
|
|
||||||
var value = jQuery.urldecode(tmp[1]);
|
|
||||||
if (key in result)
|
|
||||||
result[key].push(value);
|
|
||||||
else
|
|
||||||
result[key] = [value];
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* highlight a given string on a jquery object by wrapping it in
|
|
||||||
* span elements with the given class name.
|
|
||||||
*/
|
|
||||||
jQuery.fn.highlightText = function(text, className) {
|
|
||||||
function highlight(node, addItems) {
|
|
||||||
if (node.nodeType === 3) {
|
|
||||||
var val = node.nodeValue;
|
|
||||||
var pos = val.toLowerCase().indexOf(text);
|
|
||||||
if (pos >= 0 &&
|
|
||||||
!jQuery(node.parentNode).hasClass(className) &&
|
|
||||||
!jQuery(node.parentNode).hasClass("nohighlight")) {
|
|
||||||
var span;
|
|
||||||
var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
|
|
||||||
if (isInSVG) {
|
|
||||||
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
|
|
||||||
} else {
|
|
||||||
span = document.createElement("span");
|
|
||||||
span.className = className;
|
|
||||||
}
|
|
||||||
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
|
|
||||||
node.parentNode.insertBefore(span, node.parentNode.insertBefore(
|
|
||||||
document.createTextNode(val.substr(pos + text.length)),
|
|
||||||
node.nextSibling));
|
|
||||||
node.nodeValue = val.substr(0, pos);
|
|
||||||
if (isInSVG) {
|
|
||||||
var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
|
|
||||||
var bbox = node.parentElement.getBBox();
|
|
||||||
rect.x.baseVal.value = bbox.x;
|
|
||||||
rect.y.baseVal.value = bbox.y;
|
|
||||||
rect.width.baseVal.value = bbox.width;
|
|
||||||
rect.height.baseVal.value = bbox.height;
|
|
||||||
rect.setAttribute('class', className);
|
|
||||||
addItems.push({
|
|
||||||
"parent": node.parentNode,
|
|
||||||
"target": rect});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (!jQuery(node).is("button, select, textarea")) {
|
|
||||||
jQuery.each(node.childNodes, function() {
|
|
||||||
highlight(this, addItems);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
var addItems = [];
|
|
||||||
var result = this.each(function() {
|
|
||||||
highlight(this, addItems);
|
|
||||||
});
|
|
||||||
for (var i = 0; i < addItems.length; ++i) {
|
|
||||||
jQuery(addItems[i].parent).before(addItems[i].target);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* backward compatibility for jQuery.browser
|
|
||||||
* This will be supported until firefox bug is fixed.
|
|
||||||
*/
|
|
||||||
if (!jQuery.browser) {
|
|
||||||
jQuery.uaMatch = function(ua) {
|
|
||||||
ua = ua.toLowerCase();
|
|
||||||
|
|
||||||
var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
|
|
||||||
/(webkit)[ \/]([\w.]+)/.exec(ua) ||
|
|
||||||
/(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
|
|
||||||
/(msie) ([\w.]+)/.exec(ua) ||
|
|
||||||
ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
|
|
||||||
[];
|
|
||||||
|
|
||||||
return {
|
|
||||||
browser: match[ 1 ] || "",
|
|
||||||
version: match[ 2 ] || "0"
|
|
||||||
};
|
|
||||||
};
|
|
||||||
jQuery.browser = {};
|
|
||||||
jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
|
|
||||||
}
|
|
||||||
|
After Width: | Height: | Size: 78 B |
|
|
@ -0,0 +1,900 @@
|
||||||
|
/*
|
||||||
|
* basic.css
|
||||||
|
* ~~~~~~~~~
|
||||||
|
*
|
||||||
|
* Sphinx stylesheet -- basic theme.
|
||||||
|
*
|
||||||
|
* :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
|
||||||
|
* :license: BSD, see LICENSE for details.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* -- main layout ----------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.clearer {
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.section::after {
|
||||||
|
display: block;
|
||||||
|
content: '';
|
||||||
|
clear: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- relbar ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.related {
|
||||||
|
width: 100%;
|
||||||
|
font-size: 90%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related h3 {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related ul {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0 0 0 10px;
|
||||||
|
list-style: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related li {
|
||||||
|
display: inline;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related li.right {
|
||||||
|
float: right;
|
||||||
|
margin-right: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- sidebar --------------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.sphinxsidebarwrapper {
|
||||||
|
padding: 10px 5px 0 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar {
|
||||||
|
float: left;
|
||||||
|
width: 210px;
|
||||||
|
margin-left: -100%;
|
||||||
|
font-size: 90%;
|
||||||
|
word-wrap: break-word;
|
||||||
|
overflow-wrap : break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar ul {
|
||||||
|
list-style: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar ul ul,
|
||||||
|
div.sphinxsidebar ul.want-points {
|
||||||
|
margin-left: 20px;
|
||||||
|
list-style: square;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar ul ul {
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar form {
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar input {
|
||||||
|
border: 1px solid #98dbcc;
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar #searchbox form.search {
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar #searchbox input[type="text"] {
|
||||||
|
float: left;
|
||||||
|
width: 80%;
|
||||||
|
padding: 0.25em;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar #searchbox input[type="submit"] {
|
||||||
|
float: left;
|
||||||
|
width: 20%;
|
||||||
|
border-left: none;
|
||||||
|
padding: 0.25em;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
img {
|
||||||
|
border: 0;
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- search page ----------------------------------------------------------- */
|
||||||
|
|
||||||
|
ul.search {
|
||||||
|
margin: 10px 0 0 20px;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul.search li {
|
||||||
|
padding: 5px 0 5px 20px;
|
||||||
|
background-image: url(file.png);
|
||||||
|
background-repeat: no-repeat;
|
||||||
|
background-position: 0 7px;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul.search li a {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul.search li p.context {
|
||||||
|
color: #888;
|
||||||
|
margin: 2px 0 0 30px;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
ul.keywordmatches li.goodmatch a {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- index page ------------------------------------------------------------ */
|
||||||
|
|
||||||
|
table.contentstable {
|
||||||
|
width: 90%;
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.contentstable p.biglink {
|
||||||
|
line-height: 150%;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.biglink {
|
||||||
|
font-size: 1.3em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.linkdescr {
|
||||||
|
font-style: italic;
|
||||||
|
padding-top: 5px;
|
||||||
|
font-size: 90%;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- general index --------------------------------------------------------- */
|
||||||
|
|
||||||
|
table.indextable {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable td {
|
||||||
|
text-align: left;
|
||||||
|
vertical-align: top;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable ul {
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 0;
|
||||||
|
list-style-type: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable > tbody > tr > td > ul {
|
||||||
|
padding-left: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable tr.pcap {
|
||||||
|
height: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.indextable tr.cap {
|
||||||
|
margin-top: 10px;
|
||||||
|
background-color: #f2f2f2;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.toggler {
|
||||||
|
margin-right: 3px;
|
||||||
|
margin-top: 3px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.modindex-jumpbox {
|
||||||
|
border-top: 1px solid #ddd;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
margin: 1em 0 1em 0;
|
||||||
|
padding: 0.4em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.genindex-jumpbox {
|
||||||
|
border-top: 1px solid #ddd;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
margin: 1em 0 1em 0;
|
||||||
|
padding: 0.4em;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- domain module index --------------------------------------------------- */
|
||||||
|
|
||||||
|
table.modindextable td {
|
||||||
|
padding: 2px;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- general body styles --------------------------------------------------- */
|
||||||
|
|
||||||
|
div.body {
|
||||||
|
min-width: 360px;
|
||||||
|
max-width: 800px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body p, div.body dd, div.body li, div.body blockquote {
|
||||||
|
-moz-hyphens: auto;
|
||||||
|
-ms-hyphens: auto;
|
||||||
|
-webkit-hyphens: auto;
|
||||||
|
hyphens: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.headerlink {
|
||||||
|
visibility: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1:hover > a.headerlink,
|
||||||
|
h2:hover > a.headerlink,
|
||||||
|
h3:hover > a.headerlink,
|
||||||
|
h4:hover > a.headerlink,
|
||||||
|
h5:hover > a.headerlink,
|
||||||
|
h6:hover > a.headerlink,
|
||||||
|
dt:hover > a.headerlink,
|
||||||
|
caption:hover > a.headerlink,
|
||||||
|
p.caption:hover > a.headerlink,
|
||||||
|
div.code-block-caption:hover > a.headerlink {
|
||||||
|
visibility: visible;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body p.caption {
|
||||||
|
text-align: inherit;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body td {
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
.first {
|
||||||
|
margin-top: 0 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.rubric {
|
||||||
|
margin-top: 30px;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.align-left, figure.align-left, .figure.align-left, object.align-left {
|
||||||
|
clear: left;
|
||||||
|
float: left;
|
||||||
|
margin-right: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.align-right, figure.align-right, .figure.align-right, object.align-right {
|
||||||
|
clear: right;
|
||||||
|
float: right;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.align-center, figure.align-center, .figure.align-center, object.align-center {
|
||||||
|
display: block;
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.align-default, figure.align-default, .figure.align-default {
|
||||||
|
display: block;
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.align-left {
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
.align-center {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.align-default {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.align-right {
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- sidebars -------------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.sidebar,
|
||||||
|
aside.sidebar {
|
||||||
|
margin: 0 0 0.5em 1em;
|
||||||
|
border: 1px solid #ddb;
|
||||||
|
padding: 7px;
|
||||||
|
background-color: #ffe;
|
||||||
|
width: 40%;
|
||||||
|
float: right;
|
||||||
|
clear: right;
|
||||||
|
overflow-x: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.sidebar-title {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
nav.contents,
|
||||||
|
aside.topic,
|
||||||
|
div.admonition, div.topic, blockquote {
|
||||||
|
clear: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- topics ---------------------------------------------------------------- */
|
||||||
|
nav.contents,
|
||||||
|
aside.topic,
|
||||||
|
div.topic {
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
padding: 7px;
|
||||||
|
margin: 10px 0 10px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.topic-title {
|
||||||
|
font-size: 1.1em;
|
||||||
|
font-weight: bold;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- admonitions ----------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.admonition {
|
||||||
|
margin-top: 10px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
padding: 7px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.admonition dt {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.admonition-title {
|
||||||
|
margin: 0px 10px 5px 0px;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body p.centered {
|
||||||
|
text-align: center;
|
||||||
|
margin-top: 25px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- content of sidebars/topics/admonitions -------------------------------- */
|
||||||
|
|
||||||
|
div.sidebar > :last-child,
|
||||||
|
aside.sidebar > :last-child,
|
||||||
|
nav.contents > :last-child,
|
||||||
|
aside.topic > :last-child,
|
||||||
|
div.topic > :last-child,
|
||||||
|
div.admonition > :last-child {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sidebar::after,
|
||||||
|
aside.sidebar::after,
|
||||||
|
nav.contents::after,
|
||||||
|
aside.topic::after,
|
||||||
|
div.topic::after,
|
||||||
|
div.admonition::after,
|
||||||
|
blockquote::after {
|
||||||
|
display: block;
|
||||||
|
content: '';
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- tables ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
table.docutils {
|
||||||
|
margin-top: 10px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
border: 0;
|
||||||
|
border-collapse: collapse;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.align-center {
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.align-default {
|
||||||
|
margin-left: auto;
|
||||||
|
margin-right: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
table caption span.caption-number {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
table caption span.caption-text {
|
||||||
|
}
|
||||||
|
|
||||||
|
table.docutils td, table.docutils th {
|
||||||
|
padding: 1px 8px 1px 5px;
|
||||||
|
border-top: 0;
|
||||||
|
border-left: 0;
|
||||||
|
border-right: 0;
|
||||||
|
border-bottom: 1px solid #aaa;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
text-align: left;
|
||||||
|
padding-right: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.citation {
|
||||||
|
border-left: solid 1px gray;
|
||||||
|
margin-left: 1px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.citation td {
|
||||||
|
border-bottom: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
th > :first-child,
|
||||||
|
td > :first-child {
|
||||||
|
margin-top: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
th > :last-child,
|
||||||
|
td > :last-child {
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- figures --------------------------------------------------------------- */
|
||||||
|
|
||||||
|
div.figure, figure {
|
||||||
|
margin: 0.5em;
|
||||||
|
padding: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.figure p.caption, figcaption {
|
||||||
|
padding: 0.3em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.figure p.caption span.caption-number,
|
||||||
|
figcaption span.caption-number {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.figure p.caption span.caption-text,
|
||||||
|
figcaption span.caption-text {
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- field list styles ----------------------------------------------------- */
|
||||||
|
|
||||||
|
table.field-list td, table.field-list th {
|
||||||
|
border: 0 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-list ul {
|
||||||
|
margin: 0;
|
||||||
|
padding-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-list p {
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-name {
|
||||||
|
-moz-hyphens: manual;
|
||||||
|
-ms-hyphens: manual;
|
||||||
|
-webkit-hyphens: manual;
|
||||||
|
hyphens: manual;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- hlist styles ---------------------------------------------------------- */
|
||||||
|
|
||||||
|
table.hlist {
|
||||||
|
margin: 1em 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.hlist td {
|
||||||
|
vertical-align: top;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- object description styles --------------------------------------------- */
|
||||||
|
|
||||||
|
.sig {
|
||||||
|
font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-name, code.descname {
|
||||||
|
background-color: transparent;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-name {
|
||||||
|
font-size: 1.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
code.descname {
|
||||||
|
font-size: 1.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-prename, code.descclassname {
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.optional {
|
||||||
|
font-size: 1.3em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-paren {
|
||||||
|
font-size: larger;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig-param.n {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* C++ specific styling */
|
||||||
|
|
||||||
|
.sig-inline.c-texpr,
|
||||||
|
.sig-inline.cpp-texpr {
|
||||||
|
font-family: unset;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig.c .k, .sig.c .kt,
|
||||||
|
.sig.cpp .k, .sig.cpp .kt {
|
||||||
|
color: #0033B3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig.c .m,
|
||||||
|
.sig.cpp .m {
|
||||||
|
color: #1750EB;
|
||||||
|
}
|
||||||
|
|
||||||
|
.sig.c .s, .sig.c .sc,
|
||||||
|
.sig.cpp .s, .sig.cpp .sc {
|
||||||
|
color: #067D17;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* -- other body styles ----------------------------------------------------- */
|
||||||
|
|
||||||
|
ol.arabic {
|
||||||
|
list-style: decimal;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.loweralpha {
|
||||||
|
list-style: lower-alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.upperalpha {
|
||||||
|
list-style: upper-alpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.lowerroman {
|
||||||
|
list-style: lower-roman;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.upperroman {
|
||||||
|
list-style: upper-roman;
|
||||||
|
}
|
||||||
|
|
||||||
|
:not(li) > ol > li:first-child > :first-child,
|
||||||
|
:not(li) > ul > li:first-child > :first-child {
|
||||||
|
margin-top: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
:not(li) > ol > li:last-child > :last-child,
|
||||||
|
:not(li) > ul > li:last-child > :last-child {
|
||||||
|
margin-bottom: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.simple ol p,
|
||||||
|
ol.simple ul p,
|
||||||
|
ul.simple ol p,
|
||||||
|
ul.simple ul p {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.simple > li:not(:first-child) > p,
|
||||||
|
ul.simple > li:not(:first-child) > p {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ol.simple p,
|
||||||
|
ul.simple p {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
aside.footnote > span,
|
||||||
|
div.citation > span {
|
||||||
|
float: left;
|
||||||
|
}
|
||||||
|
aside.footnote > span:last-of-type,
|
||||||
|
div.citation > span:last-of-type {
|
||||||
|
padding-right: 0.5em;
|
||||||
|
}
|
||||||
|
aside.footnote > p {
|
||||||
|
margin-left: 2em;
|
||||||
|
}
|
||||||
|
div.citation > p {
|
||||||
|
margin-left: 4em;
|
||||||
|
}
|
||||||
|
aside.footnote > p:last-of-type,
|
||||||
|
div.citation > p:last-of-type {
|
||||||
|
margin-bottom: 0em;
|
||||||
|
}
|
||||||
|
aside.footnote > p:last-of-type:after,
|
||||||
|
div.citation > p:last-of-type:after {
|
||||||
|
content: "";
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.field-list {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: fit-content(30%) auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.field-list > dt {
|
||||||
|
font-weight: bold;
|
||||||
|
word-break: break-word;
|
||||||
|
padding-left: 0.5em;
|
||||||
|
padding-right: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.field-list > dd {
|
||||||
|
padding-left: 0.5em;
|
||||||
|
margin-top: 0em;
|
||||||
|
margin-left: 0em;
|
||||||
|
margin-bottom: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl {
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dd > :first-child {
|
||||||
|
margin-top: 0px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dd ul, dd table {
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dd {
|
||||||
|
margin-top: 3px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
margin-left: 30px;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl > dd:last-child,
|
||||||
|
dl > dd:last-child > :last-child {
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
dt:target, span.highlighted {
|
||||||
|
background-color: #fbe54e;
|
||||||
|
}
|
||||||
|
|
||||||
|
rect.highlighted {
|
||||||
|
fill: #fbe54e;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.glossary dt {
|
||||||
|
font-weight: bold;
|
||||||
|
font-size: 1.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.versionmodified {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.system-message {
|
||||||
|
background-color: #fda;
|
||||||
|
padding: 5px;
|
||||||
|
border: 3px solid red;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footnote:target {
|
||||||
|
background-color: #ffa;
|
||||||
|
}
|
||||||
|
|
||||||
|
.line-block {
|
||||||
|
display: block;
|
||||||
|
margin-top: 1em;
|
||||||
|
margin-bottom: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.line-block .line-block {
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 0;
|
||||||
|
margin-left: 1.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.guilabel, .menuselection {
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
.accelerator {
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
.classifier {
|
||||||
|
font-style: oblique;
|
||||||
|
}
|
||||||
|
|
||||||
|
.classifier:before {
|
||||||
|
font-style: normal;
|
||||||
|
margin: 0 0.5em;
|
||||||
|
content: ":";
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
|
||||||
|
abbr, acronym {
|
||||||
|
border-bottom: dotted 1px;
|
||||||
|
cursor: help;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- code displays --------------------------------------------------------- */
|
||||||
|
|
||||||
|
pre {
|
||||||
|
overflow: auto;
|
||||||
|
overflow-y: hidden; /* fixes display issues on Chrome browsers */
|
||||||
|
}
|
||||||
|
|
||||||
|
pre, div[class*="highlight-"] {
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.pre {
|
||||||
|
-moz-hyphens: none;
|
||||||
|
-ms-hyphens: none;
|
||||||
|
-webkit-hyphens: none;
|
||||||
|
hyphens: none;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
div[class*="highlight-"] {
|
||||||
|
margin: 1em 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
td.linenos pre {
|
||||||
|
border: 0;
|
||||||
|
background-color: transparent;
|
||||||
|
color: #aaa;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable tbody {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable tr {
|
||||||
|
display: flex;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable td {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable td.linenos {
|
||||||
|
padding-right: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable td.code {
|
||||||
|
flex: 1;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
|
.highlight .hll {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.highlight pre,
|
||||||
|
table.highlighttable pre {
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption + div {
|
||||||
|
margin-top: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption {
|
||||||
|
margin-top: 1em;
|
||||||
|
padding: 2px 5px;
|
||||||
|
font-size: small;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption code {
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.highlighttable td.linenos,
|
||||||
|
span.linenos,
|
||||||
|
div.highlight span.gp { /* gp: Generic.Prompt */
|
||||||
|
user-select: none;
|
||||||
|
-webkit-user-select: text; /* Safari fallback only */
|
||||||
|
-webkit-user-select: none; /* Chrome/Safari */
|
||||||
|
-moz-user-select: none; /* Firefox */
|
||||||
|
-ms-user-select: none; /* IE10+ */
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption span.caption-number {
|
||||||
|
padding: 0.1em 0.3em;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.code-block-caption span.caption-text {
|
||||||
|
}
|
||||||
|
|
||||||
|
div.literal-block-wrapper {
|
||||||
|
margin: 1em 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
code.xref, a code {
|
||||||
|
background-color: transparent;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
|
||||||
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.viewcode-link {
|
||||||
|
float: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
.viewcode-back {
|
||||||
|
float: right;
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.viewcode-block:target {
|
||||||
|
margin: -1px -10px;
|
||||||
|
padding: 0 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- math display ---------------------------------------------------------- */
|
||||||
|
|
||||||
|
img.math {
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body div.math p {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.eqno {
|
||||||
|
float: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
span.eqno a.headerlink {
|
||||||
|
position: absolute;
|
||||||
|
z-index: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.math:hover a.headerlink {
|
||||||
|
visibility: visible;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- printout stylesheet --------------------------------------------------- */
|
||||||
|
|
||||||
|
@media print {
|
||||||
|
div.document,
|
||||||
|
div.documentwrapper,
|
||||||
|
div.bodywrapper {
|
||||||
|
margin: 0 !important;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar,
|
||||||
|
div.related,
|
||||||
|
div.footer,
|
||||||
|
#top-link {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,508 @@
|
||||||
|
/*
|
||||||
|
* bizstyle.css_t
|
||||||
|
* ~~~~~~~~~~~~~~
|
||||||
|
*
|
||||||
|
* Sphinx stylesheet -- business style theme.
|
||||||
|
*
|
||||||
|
* :copyright: Copyright 2011-2014 by Sphinx team, see AUTHORS.
|
||||||
|
* :license: BSD, see LICENSE for details.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
@import url("basic.css");
|
||||||
|
|
||||||
|
/* -- page layout ----------------------------------------------------------- */
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
|
||||||
|
'Verdana', sans-serif;
|
||||||
|
font-size: 14px;
|
||||||
|
letter-spacing: -0.01em;
|
||||||
|
line-height: 150%;
|
||||||
|
text-align: center;
|
||||||
|
background-color: white;
|
||||||
|
background-image: url(background_b01.png);
|
||||||
|
color: black;
|
||||||
|
padding: 0;
|
||||||
|
border-right: 1px solid #336699;
|
||||||
|
border-left: 1px solid #336699;
|
||||||
|
|
||||||
|
margin: 0px 40px 0px 40px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.document {
|
||||||
|
background-color: white;
|
||||||
|
text-align: left;
|
||||||
|
background-repeat: repeat-x;
|
||||||
|
|
||||||
|
-moz-box-shadow: 2px 2px 5px #000;
|
||||||
|
-webkit-box-shadow: 2px 2px 5px #000;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.documentwrapper {
|
||||||
|
float: left;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.bodywrapper {
|
||||||
|
margin: 0 0 0 240px;
|
||||||
|
border-left: 1px solid #ccc;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0.5em 20px 20px 20px;
|
||||||
|
}
|
||||||
|
div.bodywrapper {
|
||||||
|
margin: 0 0 0 calc(210px + 30px);
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related {
|
||||||
|
font-size: 1em;
|
||||||
|
|
||||||
|
-moz-box-shadow: 2px 2px 5px #000;
|
||||||
|
-webkit-box-shadow: 2px 2px 5px #000;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related ul {
|
||||||
|
background-color: #336699;
|
||||||
|
height: 100%;
|
||||||
|
overflow: hidden;
|
||||||
|
border-top: 1px solid #ddd;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related ul li {
|
||||||
|
color: white;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
height: 2em;
|
||||||
|
float: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related ul li.right {
|
||||||
|
float: right;
|
||||||
|
margin-right: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related ul li a {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0 5px 0 5px;
|
||||||
|
line-height: 1.75em;
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.related ul li a:hover {
|
||||||
|
color: #fff;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebarwrapper {
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar {
|
||||||
|
padding: 0.5em 12px 12px 12px;
|
||||||
|
width: 210px;
|
||||||
|
font-size: 1em;
|
||||||
|
text-align: left;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar h3, div.sphinxsidebar h4 {
|
||||||
|
margin: 1em 0 0.5em 0;
|
||||||
|
font-size: 1em;
|
||||||
|
padding: 0.1em 0 0.1em 0.5em;
|
||||||
|
color: white;
|
||||||
|
border: 1px solid #336699;
|
||||||
|
background-color: #336699;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar h3 a {
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar ul {
|
||||||
|
padding-left: 1.5em;
|
||||||
|
margin-top: 7px;
|
||||||
|
padding: 0;
|
||||||
|
line-height: 130%;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar ul ul {
|
||||||
|
margin-left: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.sphinxsidebar input {
|
||||||
|
border: 1px solid #336699;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.footer {
|
||||||
|
background-color: white;
|
||||||
|
color: #336699;
|
||||||
|
padding: 3px 8px 3px 0;
|
||||||
|
clear: both;
|
||||||
|
font-size: 0.8em;
|
||||||
|
text-align: right;
|
||||||
|
border-bottom: 1px solid #336699;
|
||||||
|
|
||||||
|
-moz-box-shadow: 2px 2px 5px #000;
|
||||||
|
-webkit-box-shadow: 2px 2px 5px #000;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.footer a {
|
||||||
|
color: #336699;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- body styles ----------------------------------------------------------- */
|
||||||
|
|
||||||
|
p {
|
||||||
|
margin: 0.8em 0 0.5em 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a {
|
||||||
|
color: #336699;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
a:hover {
|
||||||
|
color: #336699;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body a {
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1, h2, h3 {
|
||||||
|
color: #336699;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0.7em 0 0.3em 0;
|
||||||
|
font-size: 1.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2 {
|
||||||
|
margin: 1.3em 0 0.2em 0;
|
||||||
|
font-size: 1.35em;
|
||||||
|
padding-bottom: .5em;
|
||||||
|
border-bottom: 1px solid #336699;
|
||||||
|
}
|
||||||
|
|
||||||
|
h3 {
|
||||||
|
margin: 1em 0 -0.3em 0;
|
||||||
|
font-size: 1.2em;
|
||||||
|
padding-bottom: .3em;
|
||||||
|
border-bottom: 1px solid #CCCCCC;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.body h1 a, div.body h2 a, div.body h3 a,
|
||||||
|
div.body h4 a, div.body h5 a, div.body h6 a {
|
||||||
|
color: black!important;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 a.anchor, h2 a.anchor, h3 a.anchor,
|
||||||
|
h4 a.anchor, h5 a.anchor, h6 a.anchor {
|
||||||
|
display: none;
|
||||||
|
margin: 0 0 0 0.3em;
|
||||||
|
padding: 0 0.2em 0 0.2em;
|
||||||
|
color: #aaa!important;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor,
|
||||||
|
h5:hover a.anchor, h6:hover a.anchor {
|
||||||
|
display: inline;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover,
|
||||||
|
h5 a.anchor:hover, h6 a.anchor:hover {
|
||||||
|
color: #777;
|
||||||
|
background-color: #eee;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.headerlink {
|
||||||
|
color: #c60f0f!important;
|
||||||
|
font-size: 1em;
|
||||||
|
margin-left: 6px;
|
||||||
|
padding: 0 4px 0 4px;
|
||||||
|
text-decoration: none!important;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.headerlink:hover {
|
||||||
|
background-color: #ccc;
|
||||||
|
color: white!important;
|
||||||
|
}
|
||||||
|
|
||||||
|
cite, code, tt {
|
||||||
|
font-family: 'Consolas', 'Deja Vu Sans Mono',
|
||||||
|
'Bitstream Vera Sans Mono', monospace;
|
||||||
|
font-size: 0.95em;
|
||||||
|
letter-spacing: 0.01em;
|
||||||
|
}
|
||||||
|
|
||||||
|
code {
|
||||||
|
background-color: #F2F2F2;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
code.descname, code.descclassname, code.xref {
|
||||||
|
border: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
hr {
|
||||||
|
border: 1px solid #abc;
|
||||||
|
margin: 2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
a code {
|
||||||
|
border: 0;
|
||||||
|
color: #CA7900;
|
||||||
|
}
|
||||||
|
|
||||||
|
a code:hover {
|
||||||
|
color: #2491CF;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre {
|
||||||
|
background-color: transparent !important;
|
||||||
|
font-family: 'Consolas', 'Deja Vu Sans Mono',
|
||||||
|
'Bitstream Vera Sans Mono', monospace;
|
||||||
|
font-size: 0.95em;
|
||||||
|
letter-spacing: 0.015em;
|
||||||
|
line-height: 120%;
|
||||||
|
padding: 0.5em;
|
||||||
|
border-right: 5px solid #ccc;
|
||||||
|
border-left: 5px solid #ccc;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre a {
|
||||||
|
color: inherit;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
td.linenos pre {
|
||||||
|
padding: 0.5em 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.quotebar {
|
||||||
|
background-color: #f8f8f8;
|
||||||
|
max-width: 250px;
|
||||||
|
float: right;
|
||||||
|
padding: 2px 7px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
}
|
||||||
|
nav.contents,
|
||||||
|
aside.topic,
|
||||||
|
|
||||||
|
div.topic {
|
||||||
|
background-color: #f8f8f8;
|
||||||
|
}
|
||||||
|
|
||||||
|
table {
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin: 0 -0.5em 0 -0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
table td, table th {
|
||||||
|
padding: 0.2em 0.5em 0.2em 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.admonition {
|
||||||
|
font-size: 0.9em;
|
||||||
|
margin: 1em 0 1em 0;
|
||||||
|
border: 3px solid #cccccc;
|
||||||
|
background-color: #f7f7f7;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.admonition p {
|
||||||
|
margin: 0.5em 1em 0.5em 1em;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.admonition li p {
|
||||||
|
margin-left: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.admonition pre, div.warning pre {
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.highlight {
|
||||||
|
margin: 0.4em 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.admonition p.admonition-title {
|
||||||
|
margin: 0;
|
||||||
|
padding: 0.1em 0 0.1em 0.5em;
|
||||||
|
color: white;
|
||||||
|
border-bottom: 3px solid #cccccc;
|
||||||
|
font-weight: bold;
|
||||||
|
background-color: #165e83;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.danger { border: 3px solid #f0908d; background-color: #f0cfa0; }
|
||||||
|
div.error { border: 3px solid #f0908d; background-color: #ede4cd; }
|
||||||
|
div.warning { border: 3px solid #f8b862; background-color: #f0cfa0; }
|
||||||
|
div.caution { border: 3px solid #f8b862; background-color: #ede4cd; }
|
||||||
|
div.attention { border: 3px solid #f8b862; background-color: #f3f3f3; }
|
||||||
|
div.important { border: 3px solid #f0cfa0; background-color: #ede4cd; }
|
||||||
|
div.note { border: 3px solid #f0cfa0; background-color: #f3f3f3; }
|
||||||
|
div.hint { border: 3px solid #bed2c3; background-color: #f3f3f3; }
|
||||||
|
div.tip { border: 3px solid #bed2c3; background-color: #f3f3f3; }
|
||||||
|
|
||||||
|
div.danger p.admonition-title, div.error p.admonition-title {
|
||||||
|
background-color: #b7282e;
|
||||||
|
border-bottom: 3px solid #f0908d;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.caution p.admonition-title,
|
||||||
|
div.warning p.admonition-title,
|
||||||
|
div.attention p.admonition-title {
|
||||||
|
background-color: #f19072;
|
||||||
|
border-bottom: 3px solid #f8b862;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.note p.admonition-title, div.important p.admonition-title {
|
||||||
|
background-color: #f8b862;
|
||||||
|
border-bottom: 3px solid #f0cfa0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.hint p.admonition-title, div.tip p.admonition-title {
|
||||||
|
background-color: #7ebea5;
|
||||||
|
border-bottom: 3px solid #bed2c3;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.admonition ul, div.admonition ol,
|
||||||
|
div.warning ul, div.warning ol {
|
||||||
|
margin: 0.1em 0.5em 0.5em 3em;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.versioninfo {
|
||||||
|
margin: 1em 0 0 0;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
background-color: #DDEAF0;
|
||||||
|
padding: 8px;
|
||||||
|
line-height: 1.3em;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.viewcode-back {
|
||||||
|
font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva',
|
||||||
|
'Verdana', sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.viewcode-block:target {
|
||||||
|
background-color: #f4debf;
|
||||||
|
border-top: 1px solid #ac9;
|
||||||
|
border-bottom: 1px solid #ac9;
|
||||||
|
}
|
||||||
|
|
||||||
|
p.versionchanged span.versionmodified {
|
||||||
|
font-size: 0.9em;
|
||||||
|
margin-right: 0.2em;
|
||||||
|
padding: 0.1em;
|
||||||
|
background-color: #DCE6A0;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.field-list > dt {
|
||||||
|
color: white;
|
||||||
|
background-color: #82A0BE;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.field-list > dd {
|
||||||
|
background-color: #f7f7f7;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- table styles ---------------------------------------------------------- */
|
||||||
|
|
||||||
|
table.docutils {
|
||||||
|
margin: 1em 0;
|
||||||
|
padding: 0;
|
||||||
|
border: 1px solid white;
|
||||||
|
background-color: #f7f7f7;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.docutils td, table.docutils th {
|
||||||
|
padding: 1px 8px 1px 5px;
|
||||||
|
border-top: 0;
|
||||||
|
border-left: 0;
|
||||||
|
border-right: 1px solid white;
|
||||||
|
border-bottom: 1px solid white;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.docutils td p {
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 0.3em;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.field-list td, table.field-list th {
|
||||||
|
border: 0 !important;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.footnote td, table.footnote th {
|
||||||
|
border: 0 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
color: white;
|
||||||
|
text-align: left;
|
||||||
|
padding-right: 5px;
|
||||||
|
background-color: #82A0BE;
|
||||||
|
}
|
||||||
|
|
||||||
|
div.literal-block-wrapper div.code-block-caption {
|
||||||
|
background-color: #EEE;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: #CCC;
|
||||||
|
border-width: 1px 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* WIDE DESKTOP STYLE */
|
||||||
|
@media only screen and (min-width: 1176px) {
|
||||||
|
body {
|
||||||
|
margin: 0 40px 0 40px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TABLET STYLE */
|
||||||
|
@media only screen and (min-width: 768px) and (max-width: 991px) {
|
||||||
|
body {
|
||||||
|
margin: 0 40px 0 40px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* MOBILE LAYOUT (PORTRAIT/320px) */
|
||||||
|
@media only screen and (max-width: 767px) {
|
||||||
|
body {
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
div.bodywrapper {
|
||||||
|
margin: 0;
|
||||||
|
width: 100%;
|
||||||
|
border: none;
|
||||||
|
}
|
||||||
|
div.sphinxsidebar {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* MOBILE LAYOUT (LANDSCAPE/480px) */
|
||||||
|
@media only screen and (min-width: 480px) and (max-width: 767px) {
|
||||||
|
body {
|
||||||
|
margin: 0 20px 0 20px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RETINA OVERRIDES */
|
||||||
|
@media
|
||||||
|
only screen and (-webkit-min-device-pixel-ratio: 2),
|
||||||
|
only screen and (min-device-pixel-ratio: 2) {
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- end ------------------------------------------------------------------- */
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
//
|
||||||
|
// bizstyle.js
|
||||||
|
// ~~~~~~~~~~~
|
||||||
|
//
|
||||||
|
// Sphinx javascript -- for bizstyle theme.
|
||||||
|
//
|
||||||
|
// This theme was created by referring to 'sphinxdoc'
|
||||||
|
//
|
||||||
|
// :copyright: Copyright 2012-2014 by Sphinx team, see AUTHORS.
|
||||||
|
// :license: BSD, see LICENSE for details.
|
||||||
|
//
|
||||||
|
const initialiseBizStyle = () => {
|
||||||
|
if (navigator.userAgent.indexOf("iPhone") > 0 || navigator.userAgent.indexOf("Android") > 0) {
|
||||||
|
document.querySelector("li.nav-item-0 a").innerText = "Top"
|
||||||
|
}
|
||||||
|
const truncator = item => {if (item.textContent.length > 20) {
|
||||||
|
item.title = item.innerText
|
||||||
|
item.innerText = item.innerText.substr(0, 17) + "..."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
document.querySelectorAll("div.related:first ul li:not(.right) a").slice(1).forEach(truncator);
|
||||||
|
document.querySelectorAll("div.related:last ul li:not(.right) a").slice(1).forEach(truncator);
|
||||||
|
}
|
||||||
|
|
||||||
|
window.addEventListener("resize",
|
||||||
|
() => (document.querySelector("li.nav-item-0 a").innerText = (window.innerWidth <= 776) ? "Top" : "QuaPy 0.1.7 documentation")
|
||||||
|
)
|
||||||
|
|
||||||
|
if (document.readyState !== "loading") initialiseBizStyle()
|
||||||
|
else document.addEventListener("DOMContentLoaded", initialiseBizStyle)
|
||||||
|
Before Width: | Height: | Size: 107 B |
|
|
@ -1 +0,0 @@
|
||||||
.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
|
|
||||||
|
Before Width: | Height: | Size: 434 KiB |
|
|
@ -0,0 +1,156 @@
|
||||||
|
/*
|
||||||
|
* doctools.js
|
||||||
|
* ~~~~~~~~~~~
|
||||||
|
*
|
||||||
|
* Base JavaScript utilities for all Sphinx HTML documentation.
|
||||||
|
*
|
||||||
|
* :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
|
||||||
|
* :license: BSD, see LICENSE for details.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([
|
||||||
|
"TEXTAREA",
|
||||||
|
"INPUT",
|
||||||
|
"SELECT",
|
||||||
|
"BUTTON",
|
||||||
|
]);
|
||||||
|
|
||||||
|
const _ready = (callback) => {
|
||||||
|
if (document.readyState !== "loading") {
|
||||||
|
callback();
|
||||||
|
} else {
|
||||||
|
document.addEventListener("DOMContentLoaded", callback);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Small JavaScript module for the documentation.
|
||||||
|
*/
|
||||||
|
const Documentation = {
|
||||||
|
init: () => {
|
||||||
|
Documentation.initDomainIndexTable();
|
||||||
|
Documentation.initOnKeyListeners();
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* i18n support
|
||||||
|
*/
|
||||||
|
TRANSLATIONS: {},
|
||||||
|
PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
|
||||||
|
LOCALE: "unknown",
|
||||||
|
|
||||||
|
// gettext and ngettext don't access this so that the functions
|
||||||
|
// can safely bound to a different name (_ = Documentation.gettext)
|
||||||
|
gettext: (string) => {
|
||||||
|
const translated = Documentation.TRANSLATIONS[string];
|
||||||
|
switch (typeof translated) {
|
||||||
|
case "undefined":
|
||||||
|
return string; // no translation
|
||||||
|
case "string":
|
||||||
|
return translated; // translation exists
|
||||||
|
default:
|
||||||
|
return translated[0]; // (singular, plural) translation tuple exists
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
ngettext: (singular, plural, n) => {
|
||||||
|
const translated = Documentation.TRANSLATIONS[singular];
|
||||||
|
if (typeof translated !== "undefined")
|
||||||
|
return translated[Documentation.PLURAL_EXPR(n)];
|
||||||
|
return n === 1 ? singular : plural;
|
||||||
|
},
|
||||||
|
|
||||||
|
addTranslations: (catalog) => {
|
||||||
|
Object.assign(Documentation.TRANSLATIONS, catalog.messages);
|
||||||
|
Documentation.PLURAL_EXPR = new Function(
|
||||||
|
"n",
|
||||||
|
`return (${catalog.plural_expr})`
|
||||||
|
);
|
||||||
|
Documentation.LOCALE = catalog.locale;
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* helper function to focus on search bar
|
||||||
|
*/
|
||||||
|
focusSearchBar: () => {
|
||||||
|
document.querySelectorAll("input[name=q]")[0]?.focus();
|
||||||
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialise the domain index toggle buttons
|
||||||
|
*/
|
||||||
|
initDomainIndexTable: () => {
|
||||||
|
const toggler = (el) => {
|
||||||
|
const idNumber = el.id.substr(7);
|
||||||
|
const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
|
||||||
|
if (el.src.substr(-9) === "minus.png") {
|
||||||
|
el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
|
||||||
|
toggledRows.forEach((el) => (el.style.display = "none"));
|
||||||
|
} else {
|
||||||
|
el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
|
||||||
|
toggledRows.forEach((el) => (el.style.display = ""));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const togglerElements = document.querySelectorAll("img.toggler");
|
||||||
|
togglerElements.forEach((el) =>
|
||||||
|
el.addEventListener("click", (event) => toggler(event.currentTarget))
|
||||||
|
);
|
||||||
|
togglerElements.forEach((el) => (el.style.display = ""));
|
||||||
|
if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
|
||||||
|
},
|
||||||
|
|
||||||
|
initOnKeyListeners: () => {
|
||||||
|
// only install a listener if it is really needed
|
||||||
|
if (
|
||||||
|
!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
|
||||||
|
!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
|
||||||
|
)
|
||||||
|
return;
|
||||||
|
|
||||||
|
document.addEventListener("keydown", (event) => {
|
||||||
|
// bail for input elements
|
||||||
|
if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
|
||||||
|
// bail with special keys
|
||||||
|
if (event.altKey || event.ctrlKey || event.metaKey) return;
|
||||||
|
|
||||||
|
if (!event.shiftKey) {
|
||||||
|
switch (event.key) {
|
||||||
|
case "ArrowLeft":
|
||||||
|
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
|
||||||
|
|
||||||
|
const prevLink = document.querySelector('link[rel="prev"]');
|
||||||
|
if (prevLink && prevLink.href) {
|
||||||
|
window.location.href = prevLink.href;
|
||||||
|
event.preventDefault();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "ArrowRight":
|
||||||
|
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
|
||||||
|
|
||||||
|
const nextLink = document.querySelector('link[rel="next"]');
|
||||||
|
if (nextLink && nextLink.href) {
|
||||||
|
window.location.href = nextLink.href;
|
||||||
|
event.preventDefault();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// some keyboard layouts may need Shift to get /
|
||||||
|
switch (event.key) {
|
||||||
|
case "/":
|
||||||
|
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
|
||||||
|
Documentation.focusSearchBar();
|
||||||
|
event.preventDefault();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// quick alias for translations
|
||||||
|
const _ = Documentation.gettext;
|
||||||
|
|
||||||
|
_ready(Documentation.init);
|
||||||