Compare commits
22 Commits
|
|
@ -1,108 +0,0 @@
|
|||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- devel
|
||||
tags:
|
||||
- "[0-9]+.[0-9]+.[0-9]+"
|
||||
|
||||
jobs:
|
||||
|
||||
# take out unit tests
|
||||
test:
|
||||
name: Unit tests (Python ${{ matrix.python-version }})
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.11"
|
||||
env:
|
||||
QUAPY_TESTS_OMIT_LARGE_DATASETS: True
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel
|
||||
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@main"
|
||||
python -m pip install -e .[bayes,tests]
|
||||
- name: Test with unittest
|
||||
run: python -m unittest
|
||||
|
||||
# build and push documentation to gh-pages (only if pushed to the master branch)
|
||||
docs:
|
||||
name: Documentation
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/master'
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.11
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip setuptools wheel "jax[cpu]"
|
||||
python -m pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@main"
|
||||
python -m pip install -e .[neural,docs]
|
||||
- name: Build documentation
|
||||
run: sphinx-build -M html docs/source docs/build
|
||||
- name: Publish documentation
|
||||
run: |
|
||||
git clone ${{ github.server_url }}/${{ github.repository }}.git --branch gh-pages --single-branch __gh-pages/
|
||||
cp -r docs/build/html/* __gh-pages/
|
||||
cd __gh-pages/
|
||||
git config --local user.email "action@github.com"
|
||||
git config --local user.name "GitHub Action"
|
||||
git add .
|
||||
git commit -am "Documentation based on ${{ github.sha }}" || true
|
||||
- name: Push changes
|
||||
uses: ad-m/github-push-action@master
|
||||
with:
|
||||
branch: gh-pages
|
||||
directory: __gh-pages/
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
release:
|
||||
name: Build & Publish Release
|
||||
runs-on: ubuntu-latest
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip build twine
|
||||
- name: Build package
|
||||
run: python -m build
|
||||
- name: Publish to TestPyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
user: __token__
|
||||
# use these for TESTs!
|
||||
# password: ${{ secrets.TEST_PYPI_API_TOKEN }}
|
||||
# repository_url: https://test.pypi.org/legacy/
|
||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||
repository_url: https://upload.pypi.org/legacy/
|
||||
- name: Create GitHub Release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
with:
|
||||
tag_name: ${{ github.ref_name }}
|
||||
release_name: Release ${{ github.ref_name }}
|
||||
body: |
|
||||
Changes in this release:
|
||||
- see commit history for details
|
||||
draft: false
|
||||
prerelease: false
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
name: Pylint
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pylint
|
||||
- name: Analysing the code with pylint
|
||||
run: |
|
||||
pylint $(git ls-files '*.py')
|
||||
|
|
@ -69,12 +69,8 @@ instance/
|
|||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# vscode config:
|
||||
.vscode/
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/doctest
|
||||
docs/_build/doctrees
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
|
@ -89,11 +85,6 @@ ipython_config.py
|
|||
# pyenv
|
||||
.python-version
|
||||
|
||||
# poetry
|
||||
poetry.toml
|
||||
pyproject.toml
|
||||
poetry.lock
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
|
|
@ -152,8 +143,7 @@ LeQua2022
|
|||
MultiLabel
|
||||
NewMethods
|
||||
Ordinal
|
||||
Retrieval
|
||||
eDiscovery
|
||||
Archived/eDiscovery
|
||||
poster-cikm
|
||||
slides-cikm
|
||||
slides-short-cikm
|
||||
|
|
@ -162,10 +152,4 @@ svm_perf_quantification/svm_struct
|
|||
svm_perf_quantification/svm_light
|
||||
TweetSentQuant
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
*.png
|
||||
.idea
|
||||
|
|
|
|||
|
|
@ -1,79 +1,7 @@
|
|||
Change Log 0.2.0
|
||||
-----------------
|
||||
|
||||
- Base code Refactor:
|
||||
- Removing coupling between LabelledCollection and quantification methods; the fit interface changes:
|
||||
def fit(data:LabelledCollection): -> def fit(X, y):
|
||||
- Adding function "predict" (function "quantify" is still present as an alias, for the nostalgic)
|
||||
- Aggregative methods's behavior in terms of fit_classifier and how to treat the val_split is now
|
||||
indicated exclusively at construction time, and it is no longer possible to indicate it at fit time.
|
||||
This is because, in v<=0.1.9, one could create a method (e.g., ACC) and then indicate:
|
||||
my_acc.fit(tr_data, fit_classifier=False, val_split=val_data)
|
||||
in which case the first argument is unused, and this was ambiguous with
|
||||
my_acc.fit(the_data, fit_classifier=False)
|
||||
in which case the_data is to be used for validation purposes. However, the val_split could be set as a fraction
|
||||
indicating only part of the_data must be used for validation, and the rest wasted... it was certainly confusing.
|
||||
- This change imposes a versioning constrain with qunfold, which now must be >= 0.1.6
|
||||
- EMQ has been modified, so that the representation function "classify" now only provides posterior
|
||||
probabilities and, if required, these are recalibrated (e.g., by "bcts") during the aggregation function.
|
||||
- A new parameter "on_calib_error" is passed to the constructor, which informs of the policy to follow
|
||||
in case the abstention's calibration functions failed (which happens sometimes). Options include:
|
||||
- 'raise': raises a RuntimeException (default)
|
||||
- 'backup': reruns by silently avoiding calibration
|
||||
- Parameter "recalib" has been renamed "calib"
|
||||
- Added aggregative bootstrap for deriving confidence regions (confidence intervals, ellipses in the simplex, or
|
||||
ellipses in the CLR space). This method is efficient as it leverages the two-phases of the aggregative quantifiers.
|
||||
This method applies resampling only to the aggregation phase, thus avoiding to train many quantifiers, or
|
||||
classify multiple times the instances of a sample. See:
|
||||
- quapy/method/confidence.py (new)
|
||||
- the new example no. 16.confidence_regions.py
|
||||
- BayesianCC moved to confidence.py, where methods having to do with confidence intervals belong.
|
||||
- Improved documentation of qp.plot module.
|
||||
|
||||
|
||||
Change Log 0.1.9
|
||||
----------------
|
||||
|
||||
- Added LeQua 2024 datasets and normalized match distance to qp.error
|
||||
|
||||
- Improved data loaders for UCI binary and UCI multiclass datasets (thanks to Lorenzo Volpi!); these datasets
|
||||
can be loaded with standardised covariates (default)
|
||||
|
||||
- Added a default classifier for aggregative quantifiers, which now can be instantiated without specifying
|
||||
the classifier. The default classifier can be accessed in qp.environ['DEFAULT_CLS'] and is assigned to
|
||||
sklearn.linear_model.LogisticRegression(max_iter=3000). If the classifier is not specified, then a clone
|
||||
of said classifier is returned. E.g.:
|
||||
> pacc = PACC()
|
||||
is equivalent to:
|
||||
> pacc = PACC(classifier=LogisticRegression(max_iter=3000))
|
||||
|
||||
- Improved error loging in model selection. In v0.1.8 only Status.INVALID was reported; in v0.1.9 it is
|
||||
now accompanied by a textual description of the error
|
||||
|
||||
- The number of parallel workers can now be set via an environment variable by running, e.g.:
|
||||
> N_JOBS=10 python3 your_script.py
|
||||
which has the same effect as writing the following code at the beginning of your_script.py:
|
||||
> import quapy as qp
|
||||
> qp.environ["N_JOBS"] = 10
|
||||
|
||||
- Some examples have been added to the ./examples/ dir, which now contains numbered examples from basics (0)
|
||||
to advanced topics (higher numbers)
|
||||
|
||||
- Moved the wiki documents to the ./docs/ folder so that they become editable via PR for the community
|
||||
|
||||
- Added Composable methods from Mirko Bunse's qunfold library! (thanks to Mirko Bunse!)
|
||||
|
||||
- Added Continuous Integration with GitHub Actions (thanks to Mirko Bunse!)
|
||||
|
||||
- Added Bayesian CC method (thanks to Pawel Czyz!). The method is described in detail in the paper
|
||||
Ziegler, Albert, and Paweł Czyż. "Bayesian Quantification with Black-Box Estimators."
|
||||
arXiv preprint arXiv:2302.09159 (2023).
|
||||
|
||||
- Removed binary UCI datasets {acute.a, acute.b, balance.2} from the list qp.data.datasets.UCI_BINARY_DATASETS
|
||||
(the datasets are still loadable from the fetch_UCIBinaryLabelledCollection and fetch_UCIBinaryDataset
|
||||
functions, though). The reason is that these datasets tend to yield results (for all methods) that are
|
||||
one or two orders of magnitude greater than for other datasets, and this has a disproportionate impact in
|
||||
methods average (I suspect there is something wrong in those datasets).
|
||||
<...>
|
||||
|
||||
|
||||
Change Log 0.1.8
|
||||
|
|
|
|||
50
README.md
|
|
@ -13,9 +13,9 @@ for facilitating the analysis and interpretation of the experimental results.
|
|||
|
||||
### Last updates:
|
||||
|
||||
* Version 0.2.0 is released! major changes can be consulted [here](CHANGE_LOG.txt).
|
||||
* The developer API documentation is available [here](https://hlt-isti.github.io/QuaPy/index.html)
|
||||
* Manuals are available [here](https://hlt-isti.github.io/QuaPy/manuals.html)
|
||||
* Version 0.1.8 is released! major changes can be consulted [here](CHANGE_LOG.txt).
|
||||
* A detailed documentation is now available [here](https://hlt-isti.github.io/QuaPy/)
|
||||
* The developer API documentation is available [here](https://hlt-isti.github.io/QuaPy/build/html/modules.html)
|
||||
|
||||
### Installation
|
||||
|
||||
|
|
@ -25,7 +25,7 @@ pip install quapy
|
|||
|
||||
### Cite QuaPy
|
||||
|
||||
If you find QuaPy useful (and we hope you will), please consider citing the original paper in your research:
|
||||
If you find QuaPy useful (and we hope you will), plese consider citing the original paper in your research:
|
||||
|
||||
```
|
||||
@inproceedings{moreo2021quapy,
|
||||
|
|
@ -46,18 +46,19 @@ of the test set.
|
|||
|
||||
```python
|
||||
import quapy as qp
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
training, test = qp.datasets.fetch_UCIBinaryDataset("yeast").train_test
|
||||
dataset = qp.datasets.fetch_twitter('semeval16')
|
||||
|
||||
# create an "Adjusted Classify & Count" quantifier
|
||||
model = qp.method.aggregative.ACC()
|
||||
Xtr, ytr = training.Xy
|
||||
model.fit(Xtr, ytr)
|
||||
model = qp.method.aggregative.ACC(LogisticRegression())
|
||||
model.fit(dataset.training)
|
||||
|
||||
estim_prevalence = model.predict(test.X)
|
||||
true_prevalence = test.prevalence()
|
||||
estim_prevalence = model.quantify(dataset.test.instances)
|
||||
true_prevalence = dataset.test.prevalence()
|
||||
|
||||
error = qp.error.mae(true_prevalence, estim_prevalence)
|
||||
|
||||
print(f'Mean Absolute Error (MAE)={error:.3f}')
|
||||
```
|
||||
|
||||
|
|
@ -68,7 +69,7 @@ class prevalence of the training set. For this reason, any quantification model
|
|||
should be tested across many samples, even ones characterized by class prevalence
|
||||
values different or very different from those found in the training set.
|
||||
QuaPy implements sampling procedures and evaluation protocols that automate this workflow.
|
||||
See the [documentation](https://hlt-isti.github.io/QuaPy/manuals.html) for detailed examples.
|
||||
See the [Wiki](https://github.com/HLT-ISTI/QuaPy/wiki) for detailed examples.
|
||||
|
||||
## Features
|
||||
|
||||
|
|
@ -80,8 +81,7 @@ quantification methods based on structured output learning, HDy, QuaNet, quantif
|
|||
* 32 UCI Machine Learning datasets.
|
||||
* 11 Twitter quantification-by-sentiment datasets.
|
||||
* 3 product reviews quantification-by-sentiment datasets.
|
||||
* 4 tasks from LeQua 2022 competition and 4 tasks from LeQua 2024 competition
|
||||
* IFCB for Plancton quantification
|
||||
* 4 tasks from LeQua competition (_new in v0.1.7!_)
|
||||
* Native support for binary and single-label multiclass quantification scenarios.
|
||||
* Model selection functionality that minimizes quantification-oriented loss functions.
|
||||
* Visualization tools for analysing the experimental results.
|
||||
|
|
@ -102,23 +102,19 @@ In case you want to contribute improvements to quapy, please generate pull reque
|
|||
|
||||
## Documentation
|
||||
|
||||
Check out the [developer API documentation here](https://hlt-isti.github.io/QuaPy/index.html).
|
||||
The [developer API documentation](https://hlt-isti.github.io/QuaPy/build/html/modules.html) is available [here](https://hlt-isti.github.io/QuaPy/build/html/index.html).
|
||||
|
||||
Check out the [Manuals](https://hlt-isti.github.io/QuaPy/manuals.html), in which many code examples
|
||||
Check out our [Wiki](https://github.com/HLT-ISTI/QuaPy/wiki), in which many examples
|
||||
are provided:
|
||||
|
||||
* [Datasets](https://hlt-isti.github.io/QuaPy/manuals/datasets.html)
|
||||
* [Evaluation](https://hlt-isti.github.io/QuaPy/manuals/evaluation.html)
|
||||
* [Protocols](https://hlt-isti.github.io/QuaPy/manuals/protocols.html)
|
||||
* [Methods](https://hlt-isti.github.io/QuaPy/manuals/methods.html)
|
||||
* [SVMperf](https://hlt-isti.github.io/QuaPy/manuals/explicit-loss-minimization.html)
|
||||
* [Model Selection](https://hlt-isti.github.io/QuaPy/manuals/model-selection.html)
|
||||
* [Plotting](https://hlt-isti.github.io/QuaPy/manuals/plotting.html)
|
||||
* [Datasets](https://github.com/HLT-ISTI/QuaPy/wiki/Datasets)
|
||||
* [Evaluation](https://github.com/HLT-ISTI/QuaPy/wiki/Evaluation)
|
||||
* [Protocols](https://github.com/HLT-ISTI/QuaPy/wiki/Protocols)
|
||||
* [Methods](https://github.com/HLT-ISTI/QuaPy/wiki/Methods)
|
||||
* [SVMperf](https://github.com/HLT-ISTI/QuaPy/wiki/ExplicitLossMinimization)
|
||||
* [Model Selection](https://github.com/HLT-ISTI/QuaPy/wiki/Model-Selection)
|
||||
* [Plotting](https://github.com/HLT-ISTI/QuaPy/wiki/Plotting)
|
||||
|
||||
## Acknowledgments:
|
||||
|
||||
<img src="docs/source/SoBigData.png" alt="SoBigData++" width="250"/>
|
||||
|
||||
This work has been supported by the QuaDaSh project
|
||||
_"Finanziato dall’Unione europea---Next Generation EU,
|
||||
Missione 4 Componente 2 CUP B53D23026250001"_.
|
||||
<img src="SoBigData.png" alt="SoBigData++" width="250"/>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
import itertools
|
||||
import os.path
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
import quapy as qp
|
||||
from Retrieval.commons import RetrievedSamples, load_sample
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as Naive
|
||||
from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
|
||||
from quapy.data.base import LabelledCollection
|
||||
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
|
||||
from result_table.src.table import Table
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
data_home = 'data'
|
||||
|
||||
datasets = ['continent', 'gender', 'years_category', 'relative_pageviews_category', 'num_sitelinks_category']
|
||||
|
||||
param_grid = {'C': np.logspace(-4, 4, 9), 'class_weight': ['balanced', None]}
|
||||
|
||||
classifiers = [
|
||||
('LR', LogisticRegression(max_iter=5000), param_grid),
|
||||
('SVM', LinearSVC(), param_grid)
|
||||
]
|
||||
|
||||
def benchmark_name(class_name):
|
||||
return class_name.replace('_', '\_')
|
||||
|
||||
table = Table(name=f'accuracy', benchmarks=[benchmark_name(d) for d in datasets])
|
||||
table.format.show_std = False
|
||||
table.format.stat_test = None
|
||||
table.format.lower_is_better = False
|
||||
table.format.color = False
|
||||
table.format.remove_zero = True
|
||||
table.format.style = 'rules'
|
||||
|
||||
for class_name, (cls_name, cls, grid) in itertools.product(datasets, classifiers):
|
||||
|
||||
train_data_path = join(data_home, class_name, 'FULL', 'classifier_training.json') # <-------- fixed classifier
|
||||
|
||||
texts, labels = load_sample(train_data_path, class_name=class_name)
|
||||
|
||||
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=3)
|
||||
Xtr = tfidf.fit_transform(texts)
|
||||
print(f'Xtr shape={Xtr.shape}')
|
||||
|
||||
print('training classifier...', end='')
|
||||
classifier = GridSearchCV(
|
||||
cls,
|
||||
param_grid=grid,
|
||||
n_jobs=-1,
|
||||
cv=5,
|
||||
verbose=10
|
||||
)
|
||||
classifier.fit(Xtr, labels)
|
||||
classifier_acc = classifier.best_score_
|
||||
classifier_acc_per_fold = classifier.cv_results_['mean_test_score'][classifier.best_index_]
|
||||
|
||||
print(f'[done] best-params={classifier.best_params_} got {classifier_acc:.4f} score, per fold {classifier_acc_per_fold}')
|
||||
|
||||
table.add(benchmark=benchmark_name(class_name), method=cls_name, v=classifier_acc_per_fold)
|
||||
|
||||
Table.LatexPDF(f'./latex/classifier_Acc.pdf', tables=[table])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
|
||||
import quapy.functional as F
|
||||
|
||||
|
||||
Ks = [50, 100, 500, 1000]
|
||||
|
||||
CLASS_NAMES = ['continent', 'gender', 'years_category'] # ['relative_pageviews_category', 'num_sitelinks_category']:
|
||||
|
||||
DATA_SIZES = ['10K', '50K', '100K', '500K', '1M', 'FULL']
|
||||
|
||||
protected_group = {
|
||||
'gender': 'Female',
|
||||
'continent': 'Africa',
|
||||
'years_category': 'Pre-1900s',
|
||||
}
|
||||
|
||||
|
||||
def load_sample(path, class_name):
|
||||
"""
|
||||
Loads a sample json as a dataframe and returns text and labels for
|
||||
the given class_name
|
||||
|
||||
:param path: path to a json file
|
||||
:param class_name: string representing the target class
|
||||
:return: texts, labels for class_name
|
||||
"""
|
||||
df = pd.read_json(path)
|
||||
text = df.text.values
|
||||
labels = df[class_name].values
|
||||
return text, labels
|
||||
|
||||
|
||||
def binarize_labels(labels, positive_class=None):
|
||||
if positive_class is not None:
|
||||
protected_labels = labels==positive_class
|
||||
labels[protected_labels] = 1
|
||||
labels[~protected_labels] = 0
|
||||
labels = labels.astype(int)
|
||||
return labels
|
||||
|
||||
|
||||
class RetrievedSamples:
|
||||
def __init__(self,
|
||||
class_home: str,
|
||||
test_rankings_path: str,
|
||||
test_query_prevs_path: str,
|
||||
vectorizer,
|
||||
class_name,
|
||||
positive_class=None,
|
||||
classes=None,
|
||||
):
|
||||
self.class_home = class_home
|
||||
self.test_rankings_df = pd.read_json(test_rankings_path)
|
||||
self.test_query_prevs_df = pd.read_json(test_query_prevs_path)
|
||||
self.vectorizer = vectorizer
|
||||
self.class_name = class_name
|
||||
self.positive_class = positive_class
|
||||
self.classes = classes
|
||||
|
||||
def get_text_label_score(self, df, filter_rank=1000):
|
||||
df = df[df['rank']<filter_rank]
|
||||
|
||||
class_name = self.class_name
|
||||
vectorizer = self.vectorizer
|
||||
filter_classes = self.classes
|
||||
|
||||
text = df.text.values
|
||||
labels = df[class_name].values
|
||||
rel_score = df.score.values
|
||||
|
||||
labels = binarize_labels(labels, self.positive_class)
|
||||
|
||||
if filter_classes is not None:
|
||||
idx = np.isin(labels, filter_classes)
|
||||
text = text[idx]
|
||||
labels = labels[idx]
|
||||
rel_score = rel_score[idx]
|
||||
|
||||
if vectorizer is not None:
|
||||
text = vectorizer.transform(text)
|
||||
|
||||
order = np.argsort(-rel_score)
|
||||
return text[order], labels[order], rel_score[order]
|
||||
|
||||
def __call__(self):
|
||||
tests_df = self.test_rankings_df
|
||||
class_name = self.class_name
|
||||
|
||||
for file in self._list_queries():
|
||||
|
||||
# loads the training sample
|
||||
train_df = pd.read_json(file)
|
||||
if len(train_df) == 0:
|
||||
print('empty dataframe: ', file)
|
||||
else:
|
||||
Xtr, ytr, score_tr = self.get_text_label_score(train_df)
|
||||
|
||||
# loads the test sample
|
||||
query_id = self._get_query_id_from_path(file)
|
||||
sel_df = tests_df[tests_df.qid == query_id]
|
||||
Xte, yte, score_te = self.get_text_label_score(sel_df)
|
||||
|
||||
# gets the prevalence of all judged relevant documents for the query
|
||||
df = self.test_query_prevs_df
|
||||
q_rel_prevs = df.loc[df.id == query_id][class_name+'_proportions'].values[0]
|
||||
|
||||
if self.positive_class is not None:
|
||||
if self.positive_class not in q_rel_prevs:
|
||||
print(f'positive class {self.positive_class} not found in the query; skipping')
|
||||
continue
|
||||
q_rel_prevs = F.as_binary_prevalence(q_rel_prevs[self.positive_class])
|
||||
else:
|
||||
q_rel_prevs = np.asarray([q_rel_prevs.get(class_i, 0.) for class_i in self.classes])
|
||||
|
||||
yield (Xtr, ytr, score_tr), (Xte, yte, score_te), q_rel_prevs
|
||||
|
||||
def _list_queries(self):
|
||||
return sorted(glob(join(self.class_home, 'training_Query*200SPLIT.json')))
|
||||
|
||||
# def _get_test_sample(self, query_id, max_lines=-1):
|
||||
# df = self.test_rankings_df
|
||||
# sel_df = df[df.qid==int(query_id)]
|
||||
# return get_text_label_score(sel_df)
|
||||
# texts = sel_df.text.values
|
||||
# try:
|
||||
# labels = sel_df[self.class_name].values
|
||||
# except KeyError as e:
|
||||
# print(f'error: key {self.class_name} not found in test rankings')
|
||||
# raise e
|
||||
# if max_lines > 0 and len(texts) > max_lines:
|
||||
# ranks = sel_df.rank.values
|
||||
# idx = np.argsort(ranks)[:max_lines]
|
||||
# texts = np.asarray(texts)[idx]
|
||||
# labels = np.asarray(labels)[idx]
|
||||
# return texts, labels
|
||||
|
||||
def total(self):
|
||||
return len(self._list_queries())
|
||||
|
||||
def _get_query_id_from_path(self, path):
|
||||
prefix = 'training_Query-'
|
||||
posfix = 'Sample-200SPLIT'
|
||||
qid = path
|
||||
qid = qid[:qid.index(posfix)]
|
||||
qid = qid[qid.index(prefix) + len(prefix):]
|
||||
qid = int(qid)
|
||||
return qid
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,182 @@
|
|||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
import quapy as qp
|
||||
import quapy.functional as F
|
||||
from Retrieval.commons import RetrievedSamples, load_txt_sample, load_json_sample
|
||||
from Retrieval.tabular import Table
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||
from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
|
||||
from quapy.protocol import AbstractProtocol
|
||||
from quapy.data.base import LabelledCollection
|
||||
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
|
||||
"""
|
||||
In this fifth experiment, we have pairs of (Li,Ui) with Li a training set and Ui a test set as
|
||||
in the fourth experiment, and the fairness group are defined upon geographic info as in the fourth case.
|
||||
As in the fourth, the data Li and Ui have been drawn by retrieving query-related documents from
|
||||
a pool of the same size. Unlike the fourth experiment, here the training queries are
|
||||
|
||||
Por ahora 1000 en tr y 100 en test
|
||||
Parece que ahora hay muy poco shift
|
||||
"""
|
||||
|
||||
|
||||
def cls(classifier_trained=None):
|
||||
if classifier_trained is None:
|
||||
# return LinearSVC()
|
||||
return LogisticRegression()
|
||||
else:
|
||||
return classifier_trained
|
||||
|
||||
|
||||
def methods(classifier_trained=None):
|
||||
yield ('CC', ClassifyAndCount(cls(classifier_trained)))
|
||||
yield ('PCC', PCC(cls(classifier_trained)))
|
||||
yield ('ACC', ACC(cls(classifier_trained), val_split=5, n_jobs=-1))
|
||||
yield ('PACC', PACC(cls(classifier_trained), val_split=5, n_jobs=-1))
|
||||
yield ('EMQ', EMQ(cls(classifier_trained), exact_train_prev=True))
|
||||
yield ('EMQh', EMQ(cls(classifier_trained), exact_train_prev=False))
|
||||
# yield ('EMQ-BCTS', EMQ(cls(classifier_trained), exact_train_prev=False, recalib='bcts'))
|
||||
# yield ('EMQ-TS', EMQ(cls(classifier_trained), exact_train_prev=False, recalib='ts'))
|
||||
# yield ('EMQ-NBVS', EMQ(cls(classifier_trained), exact_train_prev=False, recalib='nbvs'))
|
||||
# yield ('EMQ-VS', EMQ(cls(classifier_trained), exact_train_prev=False, recalib='vs'))
|
||||
# yield ('KDE001', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.001))
|
||||
# yield ('KDE005', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.005)) # <-- wow!
|
||||
# yield ('KDE01', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.01))
|
||||
# yield ('KDE02', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.02))
|
||||
# yield ('KDE03', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.03))
|
||||
# yield ('KDE05', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.05))
|
||||
yield ('KDE07', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.07))
|
||||
# yield ('KDE10', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.10))
|
||||
yield ('MLPE', MaximumLikelihoodPrevalenceEstimation())
|
||||
|
||||
|
||||
def train_classifier():
|
||||
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=10)
|
||||
training = LabelledCollection.load(train_path, loader_func=load_json_sample, class_name=CLASS_NAME)
|
||||
|
||||
if REDUCE_TR > 0 and len(training) > REDUCE_TR:
|
||||
print('Reducing the number of documents in the training to', REDUCE_TR)
|
||||
training = training.sampling(REDUCE_TR, *training.prevalence())
|
||||
|
||||
Xtr, ytr = training.Xy
|
||||
Xtr = tfidf.fit_transform(Xtr)
|
||||
print('L orig shape = ', Xtr.shape)
|
||||
|
||||
training = LabelledCollection(Xtr, ytr)
|
||||
|
||||
print('training classifier')
|
||||
classifier_trained = LogisticRegression()
|
||||
classifier_trained = GridSearchCV(classifier_trained,
|
||||
param_grid={'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]},
|
||||
n_jobs=-1, cv=5)
|
||||
classifier_trained.fit(Xtr, ytr)
|
||||
classifier_trained = classifier_trained.best_estimator_
|
||||
trained = True
|
||||
print('[Done!]')
|
||||
|
||||
classes = training.classes_
|
||||
|
||||
print('training classes:', classes)
|
||||
print('training prevalence:', training.prevalence())
|
||||
|
||||
return tfidf, classifier_trained
|
||||
|
||||
|
||||
def reduceAtK(data: LabelledCollection, k):
|
||||
X, y = data.Xy
|
||||
X = X[:k]
|
||||
y = y[:k]
|
||||
return LabelledCollection(X, y, classes=data.classes_)
|
||||
|
||||
|
||||
RANK_AT_K = -1
|
||||
REDUCE_TR = 50000
|
||||
qp.environ['SAMPLE_SIZE'] = RANK_AT_K
|
||||
|
||||
|
||||
def scape_latex(string):
|
||||
return string.replace('_', '\_')
|
||||
|
||||
|
||||
Ks = [10, 50, 100, 250, 500, 1000, 2000]
|
||||
# Ks = [500]
|
||||
|
||||
for CLASS_NAME in ['gender_category'] : #'years_category']: #['continent', 'first_letter_category']: #, 'gender', 'gender_category', 'occupations', 'source_countries', 'source_subcont_regions', 'years_category', 'relative_pageviews_category']:
|
||||
|
||||
data_path = './' + CLASS_NAME
|
||||
|
||||
if CLASS_NAME in ['years_category', 'continent', 'gender_category']:
|
||||
train_path = join(data_path, 'train500PerGroup.json')
|
||||
else:
|
||||
train_path = join(data_path, 'train3000samples.json')
|
||||
|
||||
tfidf, classifier_trained = qp.util.pickled_resource(f'classifier_{CLASS_NAME}.pkl', train_classifier)
|
||||
trained=True
|
||||
|
||||
experiment_prot = RetrievedSamples(data_path,
|
||||
load_fn=load_json_sample,
|
||||
vectorizer=tfidf,
|
||||
max_train_lines=None,
|
||||
max_test_lines=RANK_AT_K, classes=classifier_trained.classes_, class_name=CLASS_NAME)
|
||||
|
||||
method_names = [name for name, *other in methods()]
|
||||
benchmarks = [f'{scape_latex(CLASS_NAME)}@{k}' for k in Ks]
|
||||
table_mae = Table(benchmarks, method_names, color_mode='global')
|
||||
table_mrae = Table(benchmarks, method_names, color_mode='global')
|
||||
|
||||
for method_name, quantifier in methods(classifier_trained):
|
||||
# print('Starting with method=', method_name)
|
||||
|
||||
mae_errors = {k:[] for k in Ks}
|
||||
mrae_errors = {k:[] for k in Ks}
|
||||
|
||||
pbar = tqdm(experiment_prot(), total=49)
|
||||
for train, test in pbar:
|
||||
if train is not None:
|
||||
try:
|
||||
if trained and method_name!='MLPE':
|
||||
quantifier.fit(train, val_split=train, fit_classifier=False)
|
||||
else:
|
||||
quantifier.fit(train)
|
||||
|
||||
for k in Ks:
|
||||
test_k = reduceAtK(test, k)
|
||||
estim_prev = quantifier.quantify(test_k.instances)
|
||||
|
||||
mae_errors[k].append(qp.error.mae(test_k.prevalence(), estim_prev))
|
||||
mrae_errors[k].append(qp.error.mrae(test_k.prevalence(), estim_prev, eps=(1./(2*k))))
|
||||
|
||||
except Exception as e:
|
||||
print(f'wow, something happened here! skipping; {e}')
|
||||
else:
|
||||
print('skipping one!')
|
||||
|
||||
# pbar.set_description(f'{method_name}\tmae={np.mean(mae_errors):.4f}\tmrae={np.mean(mrae_errors):.4f}')
|
||||
pbar.set_description(f'{method_name}')
|
||||
|
||||
for k in Ks:
|
||||
|
||||
table_mae.add(benchmark=f'{scape_latex(CLASS_NAME)}@{k}', method=method_name, values=mae_errors[k])
|
||||
table_mrae.add(benchmark=f'{scape_latex(CLASS_NAME)}@{k}', method=method_name, values=mrae_errors[k])
|
||||
|
||||
table_mae.latexPDF('./latex', f'table_{CLASS_NAME}_mae.tex')
|
||||
table_mrae.latexPDF('./latex', f'table_{CLASS_NAME}_mrae.tex')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,161 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
import quapy as qp
|
||||
import quapy.functional as F
|
||||
from Retrieval.commons import RetrievedSamples, load_txt_sample
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||
from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
|
||||
from quapy.protocol import AbstractProtocol
|
||||
from quapy.data.base import LabelledCollection
|
||||
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
|
||||
"""
|
||||
In this fourth experiment, we have pairs of (Li,Ui) with Li a training set and Ui a test set as
|
||||
in the third experiment, and the fairness group are defined upon geographic info as in the third case.
|
||||
The difference here is that the data Li and Ui have been drawn by retrieving query-related documents from
|
||||
a pool of the same size.
|
||||
|
||||
Por ahora 1000 en tr y 100 en test
|
||||
Parece que ahora hay muy poco shift
|
||||
"""
|
||||
|
||||
def cls(classifier_trained=None):
|
||||
if classifier_trained is None:
|
||||
# return LinearSVC()
|
||||
return LogisticRegression()
|
||||
else:
|
||||
return classifier_trained
|
||||
|
||||
|
||||
def methods(classifier_trained=None):
|
||||
yield ('CC', ClassifyAndCount(cls(classifier_trained)))
|
||||
yield ('PACC', PACC(cls(classifier_trained), val_split=5, n_jobs=-1))
|
||||
yield ('EMQ', EMQ(cls(classifier_trained), exact_train_prev=True))
|
||||
yield ('EMQh', EMQ(cls(classifier_trained), exact_train_prev=False))
|
||||
yield ('EMQ-BCTS', EMQ(cls(classifier_trained), exact_train_prev=False, recalib='bcts'))
|
||||
yield ('EMQ-TS', EMQ(cls(classifier_trained), exact_train_prev=False, recalib='ts'))
|
||||
yield ('EMQ-NBVS', EMQ(cls(classifier_trained), exact_train_prev=False, recalib='nbvs'))
|
||||
# yield ('EMQ-VS', EMQ(cls(classifier_trained), exact_train_prev=False, recalib='vs'))
|
||||
yield ('PCC', PCC(cls(classifier_trained)))
|
||||
yield ('ACC', ACC(cls(classifier_trained), val_split=5, n_jobs=-1))
|
||||
yield ('KDE001', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.001))
|
||||
yield ('KDE005', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.005)) # <-- wow!
|
||||
yield ('KDE01', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.01))
|
||||
yield ('KDE02', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.02))
|
||||
yield ('KDE03', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.03))
|
||||
yield ('KDE05', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.05))
|
||||
yield ('KDE07', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.07))
|
||||
yield ('KDE10', KDEyML(cls(classifier_trained), val_split=5, n_jobs=-1, bandwidth=0.10))
|
||||
yield ('MLPE', MaximumLikelihoodPrevalenceEstimation())
|
||||
|
||||
|
||||
def train_classifier():
|
||||
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=10)
|
||||
training = LabelledCollection.load(train_path, loader_func=load_txt_sample, verbose=True, parse_columns=False)
|
||||
|
||||
if REDUCE_TR > 0:
|
||||
print('Reducing the number of documents in the training to', REDUCE_TR)
|
||||
training = training.sampling(REDUCE_TR, *training.prevalence())
|
||||
|
||||
Xtr, ytr = training.Xy
|
||||
Xtr = tfidf.fit_transform(Xtr)
|
||||
print('L orig shape = ', Xtr.shape)
|
||||
|
||||
training = LabelledCollection(Xtr, ytr)
|
||||
|
||||
print('training classifier')
|
||||
classifier_trained = LogisticRegression()
|
||||
classifier_trained = GridSearchCV(classifier_trained,
|
||||
param_grid={'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]},
|
||||
n_jobs=-1, cv=5)
|
||||
classifier_trained.fit(Xtr, ytr)
|
||||
classifier_trained = classifier_trained.best_estimator_
|
||||
trained = True
|
||||
print('[Done!]')
|
||||
|
||||
classes = training.classes_
|
||||
|
||||
print('training classes:', classes)
|
||||
print('training prevalence:', training.prevalence())
|
||||
|
||||
return tfidf, classifier_trained
|
||||
|
||||
|
||||
|
||||
RANK_AT_K = 1000
|
||||
REDUCE_TR = 50000
|
||||
qp.environ['SAMPLE_SIZE'] = RANK_AT_K
|
||||
|
||||
data_path = './50_50_split_trec'
|
||||
train_path = join(data_path, 'train_50_50_continent.txt')
|
||||
|
||||
tfidf, classifier_trained = qp.util.pickled_resource('classifier.pkl', train_classifier)
|
||||
trained=True
|
||||
|
||||
experiment_prot = RetrievedSamples(data_path,
|
||||
load_fn=load_txt_sample,
|
||||
vectorizer=tfidf,
|
||||
max_train_lines=None,
|
||||
max_test_lines=RANK_AT_K, classes=classifier_trained.classes_)
|
||||
|
||||
result_mae_dict = {}
|
||||
result_mrae_dict = {}
|
||||
for method_name, quantifier in methods(classifier_trained):
|
||||
# print('Starting with method=', method_name)
|
||||
|
||||
mae_errors = []
|
||||
mrae_errors = []
|
||||
pbar = tqdm(experiment_prot(), total=49)
|
||||
for train, test in pbar:
|
||||
if train is not None:
|
||||
try:
|
||||
|
||||
# print(train.prevalence())
|
||||
# print(test.prevalence())
|
||||
if trained and method_name!='MLPE':
|
||||
quantifier.fit(train, val_split=train, fit_classifier=False)
|
||||
else:
|
||||
quantifier.fit(train)
|
||||
estim_prev = quantifier.quantify(test.instances)
|
||||
|
||||
mae = qp.error.mae(test.prevalence(), estim_prev)
|
||||
mae_errors.append(mae)
|
||||
|
||||
mrae = qp.error.mrae(test.prevalence(), estim_prev)
|
||||
mrae_errors.append(mrae)
|
||||
|
||||
# print()
|
||||
# print('Training prevalence:', F.strprev(train.prevalence()), 'shape', train.X.shape)
|
||||
# print('Test prevalence:', F.strprev(test.prevalence()), 'shape', test.X.shape)
|
||||
# print('Estim prevalence:', F.strprev(estim_prev))
|
||||
|
||||
except Exception as e:
|
||||
print(f'wow, something happened here! skipping; {e}')
|
||||
else:
|
||||
print('skipping one!')
|
||||
|
||||
pbar.set_description(f'{method_name}\tmae={np.mean(mae_errors):.4f}\tmrae={np.mean(mrae_errors):.4f}')
|
||||
print()
|
||||
result_mae_dict[method_name] = np.mean(mae_errors)
|
||||
result_mrae_dict[method_name] = np.mean(mrae_errors)
|
||||
|
||||
print('Results\n'+('-'*100))
|
||||
for method_name in result_mae_dict.keys():
|
||||
MAE = result_mae_dict[method_name]
|
||||
MRAE = result_mrae_dict[method_name]
|
||||
print(f'{method_name}\t{MAE=:.5f}\t{MRAE=:.5f}')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
import quapy.functional as F
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||
from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC
|
||||
from quapy.protocol import AbstractProtocol
|
||||
from quapy.data.base import LabelledCollection
|
||||
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
|
||||
"""
|
||||
This was the very first experiment. 1 big training set and many test rankings produced according to some queries.
|
||||
The quantification methods did not seem to work. The more sophisticated the method is, the worse it performed.
|
||||
This is a clear indication that the PPS assumptions do not hold.
|
||||
Actually, while the training set could be some iid sample from a distribution L and every test set
|
||||
is a iid sample from a distribution U, it is pretty clear that P(X|Y) is different, since the test set
|
||||
are biased towards a query term whereas the training set is not.
|
||||
"""
|
||||
|
||||
def methods():
|
||||
yield ('MLPE', MaximumLikelihoodPrevalenceEstimation())
|
||||
yield ('CC', ClassifyAndCount(LogisticRegression(n_jobs=-1)))
|
||||
yield ('ACC', ACC(LogisticRegression(n_jobs=-1)))
|
||||
yield ('PCC', PCC(LogisticRegression(n_jobs=-1)))
|
||||
yield ('PACC', PACC(LogisticRegression(n_jobs=-1)))
|
||||
yield ('EMQ', EMQ(LogisticRegression(n_jobs=-1)))
|
||||
|
||||
|
||||
def load_txt_sample(path, verbose=False):
|
||||
if verbose:
|
||||
print(f'loading {path}...', end='')
|
||||
df = pd.read_csv(path, sep='\t')
|
||||
if verbose:
|
||||
print('[done]')
|
||||
X = df['text']
|
||||
y = df['first_letter_category']
|
||||
|
||||
return X, y
|
||||
|
||||
class RetrievedSamples(AbstractProtocol):
|
||||
|
||||
def __init__(self, path_dir: str, load_fn, vectorizer, classes):
|
||||
self.path_dir = path_dir
|
||||
self.load_fn = load_fn
|
||||
self.vectorizer = vectorizer
|
||||
self.classes = classes
|
||||
|
||||
def __call__(self):
|
||||
for file in glob(join(self.path_dir, 'test_data_*.txt')):
|
||||
X, y = self.load_fn(file)
|
||||
if len(X)!=qp.environ['SAMPLE_SIZE']:
|
||||
print(f'[warning]: file {file} contains {len(X)} instances (expected: {qp.environ["SAMPLE_SIZE"]})')
|
||||
# assert len(X) == qp.environ['SAMPLE_SIZE'], f'unexpected sample size for file {file}, found {len(X)}'
|
||||
X = self.vectorizer.transform(X)
|
||||
sample = LabelledCollection(X, y, classes=self.classes)
|
||||
yield sample.Xp
|
||||
|
||||
|
||||
qp.environ['SAMPLE_SIZE']=100
|
||||
|
||||
data_path = './data'
|
||||
train_path = join(data_path, 'train_data.txt')
|
||||
|
||||
|
||||
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5)
|
||||
|
||||
training = LabelledCollection.load(train_path, loader_func=load_txt_sample, verbose=True)
|
||||
|
||||
# training = training.sampling(1000)
|
||||
|
||||
Xtr, ytr = training.Xy
|
||||
Xtr = tfidf.fit_transform(Xtr)
|
||||
print('Xtr shape = ', Xtr.shape)
|
||||
|
||||
training = LabelledCollection(Xtr, ytr)
|
||||
classes = training.classes_
|
||||
|
||||
test_prot = RetrievedSamples(data_path, load_fn=load_txt_sample, vectorizer=tfidf, classes=classes)
|
||||
|
||||
print('Training prevalence:', F.strprev(training.prevalence()))
|
||||
for X, p in test_prot():
|
||||
print('Test prevalence:', F.strprev(p))
|
||||
|
||||
for method_name, quantifier in methods():
|
||||
print('training ', method_name)
|
||||
quantifier.fit(training)
|
||||
print('[done]')
|
||||
|
||||
report = qp.evaluation.evaluation_report(quantifier, test_prot, error_metrics=['mae', 'mrae'], verbose=True)
|
||||
|
||||
print(report.mean())
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,131 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
import quapy.functional as F
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||
from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC
|
||||
from quapy.protocol import AbstractProtocol
|
||||
from quapy.data.base import LabelledCollection
|
||||
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
|
||||
"""
|
||||
In this second experiment, we have pairs of (Li,Ui) with Li a training set and Ui a test set.
|
||||
Both elements in the pair are *retrieved according to the same query*. This is a way to impose
|
||||
the same type of bias that was present in the test, to the training set. Let's see...
|
||||
"""
|
||||
|
||||
def methods():
|
||||
yield ('PACC', PACC(LogisticRegression(), val_split=5, n_jobs=-1))
|
||||
yield ('CC', ClassifyAndCount(LogisticRegression()))
|
||||
yield ('EMQ', EMQ(LogisticRegression()))
|
||||
yield ('PCC', PCC(LogisticRegression()))
|
||||
yield ('ACC', ACC(LogisticRegression(), val_split=5, n_jobs=-1))
|
||||
yield ('MLPE', MaximumLikelihoodPrevalenceEstimation())
|
||||
|
||||
|
||||
def load_txt_sample(path, parse_columns, verbose=False, max_lines=None):
|
||||
if verbose:
|
||||
print(f'loading {path}...', end='')
|
||||
df = pd.read_csv(path, sep='\t')
|
||||
if verbose:
|
||||
print('[done]')
|
||||
X = df['text'].values
|
||||
y = df['first_letter_category'].values
|
||||
|
||||
if parse_columns:
|
||||
rank = df['rank'].values
|
||||
scores = df['score'].values
|
||||
order = np.argsort(rank)
|
||||
X = X[order]
|
||||
y = y[order]
|
||||
rank = rank[order]
|
||||
scores = scores[order]
|
||||
|
||||
if max_lines is not None:
|
||||
X = X[:max_lines]
|
||||
y = y[:max_lines]
|
||||
|
||||
return X, y
|
||||
|
||||
|
||||
class RetrievedSamples(AbstractProtocol):
|
||||
|
||||
def __init__(self, path_dir: str, load_fn, vectorizer, classes, max_train_lines=None, max_test_lines=None):
|
||||
self.path_dir = path_dir
|
||||
self.load_fn = load_fn
|
||||
self.vectorizer = vectorizer
|
||||
self.classes = classes
|
||||
self.max_train_lines = max_train_lines
|
||||
self.max_test_lines = max_test_lines
|
||||
|
||||
def __call__(self):
|
||||
for file in glob(join(self.path_dir, 'test_rankings_*.txt')):
|
||||
|
||||
X, y = self.load_fn(file.replace('test_', 'training_'), parse_columns=True, max_lines=self.max_train_lines)
|
||||
X = self.vectorizer.transform(X)
|
||||
train_sample = LabelledCollection(X, y, classes=self.classes)
|
||||
|
||||
X, y = self.load_fn(file, parse_columns=True, max_lines=self.max_test_lines)
|
||||
if len(X)!=qp.environ['SAMPLE_SIZE']:
|
||||
print(f'[warning]: file {file} contains {len(X)} instances (expected: {qp.environ["SAMPLE_SIZE"]})')
|
||||
# assert len(X) == qp.environ['SAMPLE_SIZE'], f'unexpected sample size for file {file}, found {len(X)}'
|
||||
X = self.vectorizer.transform(X)
|
||||
test_sample = LabelledCollection(X, y, classes=self.classes)
|
||||
|
||||
yield train_sample, test_sample
|
||||
|
||||
|
||||
RANK_AT_K = 500
|
||||
REDUCE_TR = 50000
|
||||
qp.environ['SAMPLE_SIZE'] = RANK_AT_K
|
||||
|
||||
data_path = './newCollection'
|
||||
train_path = join(data_path, 'train_data.txt')
|
||||
|
||||
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=10)
|
||||
|
||||
training = LabelledCollection.load(train_path, loader_func=load_txt_sample, verbose=True, parse_columns=False)
|
||||
if REDUCE_TR>0:
|
||||
print('Reducing the number of documents in the training to', REDUCE_TR)
|
||||
training = training.sampling(REDUCE_TR)
|
||||
|
||||
Xtr, ytr = training.Xy
|
||||
Xtr = tfidf.fit_transform(Xtr)
|
||||
print('L orig shape = ', Xtr.shape)
|
||||
|
||||
training = LabelledCollection(Xtr, ytr)
|
||||
classes = training.classes_
|
||||
|
||||
experiment_prot = RetrievedSamples(data_path,
|
||||
load_fn=load_txt_sample,
|
||||
vectorizer=tfidf,
|
||||
classes=classes,
|
||||
max_train_lines=RANK_AT_K,
|
||||
max_test_lines=RANK_AT_K)
|
||||
|
||||
for method_name, quantifier in methods():
|
||||
print('Starting with method=', method_name)
|
||||
|
||||
errors = []
|
||||
pbar = tqdm(experiment_prot(), total=49)
|
||||
for train, test in pbar:
|
||||
# print('Training prevalence:', F.strprev(training.prevalence()), 'shape', train.X.shape)
|
||||
# print('Test prevalence:', F.strprev(test.prevalence()), 'shape', test.X.shape)
|
||||
|
||||
quantifier.fit(train)
|
||||
estim_prev = quantifier.quantify(test.instances)
|
||||
mae = qp.error.mae(test.prevalence(), estim_prev)
|
||||
errors.append(mae)
|
||||
|
||||
pbar.set_description(f'mae={np.mean(errors):.4f}')
|
||||
print()
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
import quapy.functional as F
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||
from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC
|
||||
from quapy.protocol import AbstractProtocol
|
||||
from quapy.data.base import LabelledCollection
|
||||
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
|
||||
"""
|
||||
In this third experiment, we have pairs of (Li,Ui) with Li a training set and Ui a test set as
|
||||
in the second experiment, but in this case the fairness group are defined upon geographic info.
|
||||
"""
|
||||
|
||||
def methods():
|
||||
yield ('CC', ClassifyAndCount(LogisticRegression()))
|
||||
yield ('PACC', PACC(LogisticRegression(), val_split=5, n_jobs=-1))
|
||||
yield ('EMQ', EMQ(LogisticRegression()))
|
||||
yield ('PCC', PCC(LogisticRegression()))
|
||||
yield ('ACC', ACC(LogisticRegression(), val_split=5, n_jobs=-1))
|
||||
yield ('MLPE', MaximumLikelihoodPrevalenceEstimation())
|
||||
|
||||
|
||||
def load_txt_sample(path, parse_columns, verbose=False, max_lines=None):
|
||||
# print('reading', path)
|
||||
if verbose:
|
||||
print(f'loading {path}...', end='')
|
||||
df = pd.read_csv(path, sep='\t')
|
||||
if verbose:
|
||||
print('[done]')
|
||||
X = df['text'].values
|
||||
y = df['continent'].values
|
||||
|
||||
if parse_columns:
|
||||
rank = df['rank'].values
|
||||
scores = df['score'].values
|
||||
rank = rank[y != 'Antarctica']
|
||||
scores = scores[y != 'Antarctica']
|
||||
|
||||
X = X[y!='Antarctica']
|
||||
y = y[y!='Antarctica']
|
||||
|
||||
if parse_columns:
|
||||
order = np.argsort(rank)
|
||||
X = X[order]
|
||||
y = y[order]
|
||||
rank = rank[order]
|
||||
scores = scores[order]
|
||||
|
||||
if max_lines is not None:
|
||||
X = X[:max_lines]
|
||||
y = y[:max_lines]
|
||||
|
||||
return X, y
|
||||
|
||||
|
||||
class RetrievedSamples(AbstractProtocol):
|
||||
|
||||
def __init__(self, path_dir: str, load_fn, vectorizer, max_train_lines=None, max_test_lines=None):
|
||||
self.path_dir = path_dir
|
||||
self.load_fn = load_fn
|
||||
self.vectorizer = vectorizer
|
||||
self.max_train_lines = max_train_lines
|
||||
self.max_test_lines = max_test_lines
|
||||
|
||||
def __call__(self):
|
||||
for file in glob(join(self.path_dir, 'test_rankings_*.txt')):
|
||||
|
||||
X, y = self.load_fn(file.replace('test_', 'training_'), parse_columns=True, max_lines=self.max_train_lines)
|
||||
X = self.vectorizer.transform(X)
|
||||
train_sample = LabelledCollection(X, y)
|
||||
|
||||
X, y = self.load_fn(file, parse_columns=True, max_lines=self.max_test_lines)
|
||||
if len(X)!=qp.environ['SAMPLE_SIZE']:
|
||||
print(f'[warning]: file {file} contains {len(X)} instances (expected: {qp.environ["SAMPLE_SIZE"]})')
|
||||
# assert len(X) == qp.environ['SAMPLE_SIZE'], f'unexpected sample size for file {file}, found {len(X)}'
|
||||
X = self.vectorizer.transform(X)
|
||||
try:
|
||||
test_sample = LabelledCollection(X, y, classes=train_sample.classes_)
|
||||
except ValueError as e:
|
||||
print(f'file {file} caused error {e}')
|
||||
yield None, None
|
||||
|
||||
# print('train #classes:', train_sample.n_classes, train_sample.prevalence())
|
||||
# print('test #classes:', test_sample.n_classes, test_sample.prevalence())
|
||||
|
||||
yield train_sample, test_sample
|
||||
|
||||
|
||||
RANK_AT_K = 100
|
||||
REDUCE_TR = 50000
|
||||
qp.environ['SAMPLE_SIZE'] = RANK_AT_K
|
||||
|
||||
data_path = './newCollectionGeo'
|
||||
train_path = join(data_path, 'train_data_continent.txt')
|
||||
|
||||
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=10)
|
||||
|
||||
training = LabelledCollection.load(train_path, loader_func=load_txt_sample, verbose=True, parse_columns=False)
|
||||
|
||||
if REDUCE_TR>0:
|
||||
print('Reducing the number of documents in the training to', REDUCE_TR)
|
||||
training = training.sampling(REDUCE_TR)
|
||||
|
||||
Xtr, ytr = training.Xy
|
||||
Xtr = tfidf.fit_transform(Xtr)
|
||||
print('L orig shape = ', Xtr.shape)
|
||||
|
||||
training = LabelledCollection(Xtr, ytr)
|
||||
classes = training.classes_
|
||||
|
||||
print('training classes:', classes)
|
||||
print('training prevalence:', training.prevalence())
|
||||
|
||||
experiment_prot = RetrievedSamples(data_path,
|
||||
load_fn=load_txt_sample,
|
||||
vectorizer=tfidf,
|
||||
max_train_lines=None,
|
||||
max_test_lines=RANK_AT_K)
|
||||
|
||||
for method_name, quantifier in methods():
|
||||
print('Starting with method=', method_name)
|
||||
|
||||
errors = []
|
||||
pbar = tqdm(experiment_prot(), total=49)
|
||||
for train, test in pbar:
|
||||
if train is not None:
|
||||
try:
|
||||
# print('Training prevalence:', F.strprev(training.prevalence()), 'shape', train.X.shape)
|
||||
# print('Test prevalence:', F.strprev(test.prevalence()), 'shape', test.X.shape)
|
||||
|
||||
# print(train.prevalence())
|
||||
# print(test.prevalence())
|
||||
quantifier.fit(train)
|
||||
estim_prev = quantifier.quantify(test.instances)
|
||||
mae = qp.error.mae(test.prevalence(), estim_prev)
|
||||
errors.append(mae)
|
||||
except Exception as e:
|
||||
print(f'wow, something happened here! skipping; {e}')
|
||||
else:
|
||||
print('skipping one!')
|
||||
|
||||
pbar.set_description(f'mae={np.mean(errors):.4f}')
|
||||
print()
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,299 @@
|
|||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV, cross_val_predict
|
||||
from sklearn.base import clone
|
||||
|
||||
import quapy as qp
|
||||
from Retrieval.commons import *
|
||||
from Retrieval.methods import *
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as Naive
|
||||
from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
|
||||
from quapy.data.base import LabelledCollection
|
||||
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
|
||||
from result_table.src.table import Table
|
||||
|
||||
"""
|
||||
In this sixth experiment, we have a collection C of >6M documents.
|
||||
We split C in two equally-sized pools TrPool, TePool
|
||||
|
||||
I have randomly split the collection in 50% train and 50% split. In each split we have approx. 3.25 million documents.
|
||||
|
||||
We have 5 categories we can evaluate over: Continent, Years_Category, Num_Site_Links, Relative Pageviews and Gender.
|
||||
|
||||
From the training set I have created smaller subsets for each category:
|
||||
100K, 500K, 1M and FULL (3.25M)
|
||||
|
||||
For each category and subset, I have created a training set called: "classifier_training.json". This is the "base" training set for the classifier. In this set we have 500 documents per group in a category. (For example: Male 500, Female 500, Unknown 500). Let me know if you think we need more.
|
||||
|
||||
To "bias" the quantifier towards a query, I have executed the queries (97) on the different training sets and retrieved the 200 most relevant documents per group.
|
||||
For example: (Male 200, Female 200, Unknown 200)
|
||||
Sometimes this is infeasible, we should probably discuss this at some point.
|
||||
|
||||
You can find the results for every query in a file named:
|
||||
|
||||
"training_Query-[QID]Sample-200SPLIT.json"
|
||||
|
||||
Test:
|
||||
To evaluate our approach, I have executed the queries on the test split. You can find the results for all 97 queries up till k=1000 in this file.
|
||||
testRanking_Results.json
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def methods(classifier, class_name=None, binarize=False):
|
||||
|
||||
kde_param = {
|
||||
'continent': 0.01,
|
||||
'gender': 0.03,
|
||||
'years_category':0.03
|
||||
}
|
||||
|
||||
yield ('NaiveQuery', Naive())
|
||||
yield ('CC', ClassifyAndCount(classifier))
|
||||
yield ('PACC', PACC(classifier, val_split=5, n_jobs=-1))
|
||||
yield ('KDEy-ML', KDEyML(classifier, val_split=5, n_jobs=-1, bandwidth=kde_param.get(class_name, 0.01)))
|
||||
if binarize:
|
||||
yield ('M3b', M3rND_ModelB(classifier))
|
||||
yield ('M3b+', M3rND_ModelB(classifier))
|
||||
yield ('M3d', M3rND_ModelD(classifier))
|
||||
yield ('M3d+', M3rND_ModelD(classifier))
|
||||
|
||||
|
||||
def train_classifier_fn(train_path):
|
||||
"""
|
||||
Trains a classifier. To do so, it loads the training set, transforms it into a tfidf representation.
|
||||
The classifier is Logistic Regression, with hyperparameters C (range [0.001, 0.01, ..., 1000]) and
|
||||
class_weight (range {'balanced', None}) optimized via 5FCV.
|
||||
|
||||
:return: the tfidf-vectorizer and the classifier trained
|
||||
"""
|
||||
texts, labels = load_sample(train_path, class_name=class_name)
|
||||
|
||||
if BINARIZE:
|
||||
labels = binarize_labels(labels, positive_class=protected_group[class_name])
|
||||
|
||||
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=3)
|
||||
Xtr = tfidf.fit_transform(texts)
|
||||
print(f'Xtr shape={Xtr.shape}')
|
||||
|
||||
print('training classifier...', end='')
|
||||
classifier = LogisticRegression(max_iter=5000)
|
||||
modsel = GridSearchCV(
|
||||
classifier,
|
||||
param_grid={'C': np.logspace(-4, 4, 9), 'class_weight': ['balanced', None]},
|
||||
n_jobs=-1,
|
||||
cv=5
|
||||
)
|
||||
modsel.fit(Xtr, labels)
|
||||
classifier = modsel.best_estimator_
|
||||
classifier_acc = modsel.best_score_
|
||||
best_params = modsel.best_params_
|
||||
print(f'[done] best-params={best_params} got {classifier_acc:.4f} score')
|
||||
|
||||
print('generating cross-val predictions for M3')
|
||||
predictions = cross_val_predict(clone(classifier), Xtr, labels, cv=10, n_jobs=-1, verbose=10)
|
||||
conf_matrix = confusion_matrix(labels, predictions, labels=classifier.classes_)
|
||||
|
||||
training = LabelledCollection(Xtr, labels)
|
||||
print('training classes:', training.classes_)
|
||||
print('training prevalence:', training.prevalence())
|
||||
|
||||
return tfidf, classifier, conf_matrix
|
||||
|
||||
|
||||
def reduceAtK(data: LabelledCollection, k):
|
||||
# if k > len(data):
|
||||
# print(f'[warning] {k=}>{len(data)=}')
|
||||
X, y = data.Xy
|
||||
X = X[:k]
|
||||
y = y[:k]
|
||||
return LabelledCollection(X, y, classes=data.classes_)
|
||||
|
||||
|
||||
def benchmark_name(class_name, k=None):
|
||||
scape_class_name = class_name.replace('_', '\_')
|
||||
if k is None:
|
||||
return scape_class_name
|
||||
else:
|
||||
return f'{scape_class_name}@{k}'
|
||||
|
||||
|
||||
def run_experiment():
|
||||
|
||||
results = {
|
||||
'mae': {k: [] for k in Ks},
|
||||
'mrae': {k: [] for k in Ks},
|
||||
'rKL_error': [],
|
||||
'rND_error': []
|
||||
}
|
||||
|
||||
pbar = tqdm(experiment_prot(), total=experiment_prot.total())
|
||||
for train, test, q_rel_prevs in pbar:
|
||||
Xtr, ytr, score_tr = train
|
||||
Xte, yte, score_te = test
|
||||
|
||||
train_col = LabelledCollection(Xtr, ytr, classes=classifier.classes_)
|
||||
|
||||
if not method_name.startswith('Naive') and not method_name.startswith('M3'):
|
||||
method.fit(train_col, val_split=train_col, fit_classifier=False)
|
||||
elif method_name == 'Naive':
|
||||
method.fit(train_col)
|
||||
|
||||
test_col = LabelledCollection(Xte, yte, classes=classifier.classes_)
|
||||
rKL_estim, rKL_true = [], []
|
||||
rND_estim, rND_true = [], []
|
||||
for k in Ks:
|
||||
test_k = reduceAtK(test_col, k)
|
||||
if method_name == 'NaiveQuery':
|
||||
train_k = reduceAtK(train_col, k)
|
||||
method.fit(train_k)
|
||||
|
||||
estim_prev = method.quantify(test_k.instances)
|
||||
|
||||
# epsilon value for prevalence smoothing
|
||||
eps=(1. / (2. * k))
|
||||
|
||||
# error metrics
|
||||
test_k_prev = test_k.prevalence()
|
||||
mae = qp.error.mae(test_k_prev, estim_prev)
|
||||
mrae = qp.error.mrae(test_k_prev, estim_prev, eps=eps)
|
||||
rKL_at_k_estim = qp.error.kld(estim_prev, q_rel_prevs, eps=eps)
|
||||
rKL_at_k_true = qp.error.kld(test_k_prev, q_rel_prevs, eps=eps)
|
||||
|
||||
if BINARIZE:
|
||||
# [1] is the index of the minority or historically disadvantaged group
|
||||
rND_at_k_estim = np.abs(estim_prev[1] - q_rel_prevs[1])
|
||||
rND_at_k_true = np.abs(test_k_prev[1] - q_rel_prevs[1])
|
||||
|
||||
# collect results
|
||||
results['mae'][k].append(mae)
|
||||
results['mrae'][k].append(mrae)
|
||||
rKL_estim.append(rKL_at_k_estim)
|
||||
rKL_true.append(rKL_at_k_true)
|
||||
if BINARIZE:
|
||||
rND_estim.append(rND_at_k_estim)
|
||||
rND_true.append(rND_at_k_true)
|
||||
|
||||
|
||||
# aggregate fairness metrics
|
||||
def aggregate(rMs, Ks, Z=1):
|
||||
return (1 / Z) * sum((1. / np.log2(k)) * v for v, k in zip(rMs, Ks))
|
||||
|
||||
Z = sum((1. / np.log2(k)) for k in Ks)
|
||||
rKL_estim = aggregate(rKL_estim, Ks, Z)
|
||||
rKL_true = aggregate(rKL_true, Ks, Z)
|
||||
rKL_error = np.abs(rKL_true-rKL_estim)
|
||||
results['rKL_error'].append(rKL_error)
|
||||
|
||||
if BINARIZE:
|
||||
rND_estim = aggregate(rND_estim, Ks, Z)
|
||||
rND_true = aggregate(rND_true, Ks, Z)
|
||||
|
||||
if isinstance(method, AbstractM3rND):
|
||||
if method_name.endswith('+'):
|
||||
# learns the correction parameters from the query-specific training data
|
||||
conf_matrix_ = method.get_confusion_matrix(*train_col.Xy)
|
||||
else:
|
||||
# learns the correction parameters from the training data used to train the classifier
|
||||
conf_matrix_ = conf_matrix.copy()
|
||||
rND_estim = method.fair_measure_correction(rND_estim, conf_matrix_)
|
||||
|
||||
rND_error = np.abs(rND_true - rND_estim)
|
||||
results['rND_error'].append(rND_error)
|
||||
|
||||
pbar.set_description(f'{method_name}')
|
||||
|
||||
return results
|
||||
|
||||
|
||||
data_home = 'data'
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# final tables only contain the information for the data size 10K, each row is a class name and each colum
|
||||
# the corresponding rND (for binary) or rKL (for multiclass) score
|
||||
tables_RND, tables_DKL = [], []
|
||||
tables_final = []
|
||||
for class_mode in ['multiclass', 'binary']:
|
||||
BINARIZE = (class_mode=='binary')
|
||||
method_names = [name for name, *other in methods(None, binarize=BINARIZE)]
|
||||
|
||||
table_final = Table(name=f'rND' if BINARIZE else f'rKL', benchmarks=[benchmark_name(c) for c in CLASS_NAMES], methods=method_names)
|
||||
table_final.format.mean_macro = False
|
||||
tables_final.append(table_final)
|
||||
for class_name in CLASS_NAMES:
|
||||
tables_mae, tables_mrae = [], []
|
||||
|
||||
benchmarks_size =[benchmark_name(class_name, s) for s in DATA_SIZES]
|
||||
table_DKL = Table(name=f'rKL-{class_name}', benchmarks=benchmarks_size, methods=method_names)
|
||||
table_RND = Table(name=f'rND-{class_name}', benchmarks=benchmarks_size, methods=method_names)
|
||||
|
||||
for data_size in DATA_SIZES:
|
||||
print(class_name, class_mode, data_size)
|
||||
benchmarks_k = [benchmark_name(class_name, k) for k in Ks]
|
||||
# table_mae = Table(name=f'{class_name}-{data_size}-mae', benchmarks=benchmarks_k, methods=method_names)
|
||||
table_mrae = Table(name=f'{class_name}-{data_size}-mrae', benchmarks=benchmarks_k, methods=method_names)
|
||||
|
||||
# tables_mae.append(table_mae)
|
||||
tables_mrae.append(table_mrae)
|
||||
|
||||
# sets all paths
|
||||
class_home = join(data_home, class_name, data_size)
|
||||
train_data_path = join(data_home, class_name, 'FULL', 'classifier_training.json') # <----- fixed classifier
|
||||
classifier_path = join('classifiers', 'FULL', f'classifier_{class_name}_{class_mode}.pkl')
|
||||
test_rankings_path = join(data_home, 'testRanking_Results.json')
|
||||
test_query_prevs_path = join(data_home, 'prevelance_vectors_judged_docs.json')
|
||||
results_home = join('results', class_name, class_mode, data_size)
|
||||
positive_class = protected_group[class_name] if BINARIZE else None
|
||||
|
||||
# instantiates the classifier (trains it the first time, loads it in the subsequent executions)
|
||||
tfidf, classifier, conf_matrix \
|
||||
= qp.util.pickled_resource(classifier_path, train_classifier_fn, train_data_path)
|
||||
|
||||
experiment_prot = RetrievedSamples(
|
||||
class_home,
|
||||
test_rankings_path,
|
||||
test_query_prevs_path,
|
||||
vectorizer=tfidf,
|
||||
class_name=class_name,
|
||||
positive_class=positive_class,
|
||||
classes=classifier.classes_
|
||||
)
|
||||
|
||||
for method_name, method in methods(classifier, class_name, BINARIZE):
|
||||
|
||||
results_path = join(results_home, method_name + '.pkl')
|
||||
results = qp.util.pickled_resource(results_path, run_experiment)
|
||||
|
||||
# compose the tables
|
||||
for k in Ks:
|
||||
# table_mae.add(benchmark=benchmark_name(class_name, k), method=method_name, v=results['mae'][k])
|
||||
table_mrae.add(benchmark=benchmark_name(class_name, k), method=method_name, v=results['mrae'][k])
|
||||
table_DKL.add(benchmark=benchmark_name(class_name, data_size), method=method_name, v=results['rKL_error'])
|
||||
if BINARIZE:
|
||||
table_RND.add(benchmark=benchmark_name(class_name, data_size), method=method_name, v=results['rND_error'])
|
||||
|
||||
if data_size=='10K':
|
||||
value = results['rND_error'] if BINARIZE else results['rKL_error']
|
||||
table_final.add(benchmark=benchmark_name(class_name), method=method_name, v=value)
|
||||
|
||||
tables = ([table_RND] + tables_mrae) if BINARIZE else ([table_DKL] + tables_mrae)
|
||||
Table.LatexPDF(f'./latex/{class_mode}/{class_name}.pdf', tables=tables)
|
||||
|
||||
if BINARIZE:
|
||||
tables_RND.append(table_RND)
|
||||
else:
|
||||
tables_DKL.append(table_DKL)
|
||||
|
||||
Table.LatexPDF(f'./latex/global/main.pdf', tables=tables_RND+tables_DKL, dedicated_pages=False)
|
||||
Table.LatexPDF(f'./latex/final/main.pdf', tables=tables_final, dedicated_pages=False)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
import os.path
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.svm import LinearSVC
|
||||
|
||||
import quapy as qp
|
||||
from Retrieval.commons import RetrievedSamples, load_sample
|
||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation as Naive
|
||||
from quapy.method.aggregative import ClassifyAndCount, EMQ, ACC, PCC, PACC, KDEyML
|
||||
from quapy.data.base import LabelledCollection
|
||||
from experiments import benchmark_name, reduceAtK, run_experiment
|
||||
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
|
||||
from result_table.src.table import Table
|
||||
|
||||
|
||||
|
||||
def methods(classifier):
|
||||
for i, bandwidth in enumerate(np.linspace(0.01, 0.1, 10)):
|
||||
yield (f'KDE{str(i).zfill(2)}', KDEyML(classifier, val_split=5, n_jobs=-1, bandwidth=bandwidth))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data_home = 'data-modsel'
|
||||
|
||||
Ks = [5, 10, 25, 50, 75, 100, 250, 500, 750, 1000]
|
||||
|
||||
method_names = [m for m, *_ in methods(None)]
|
||||
|
||||
class_mode = 'multiclass'
|
||||
|
||||
dir_names={
|
||||
'gender': '100K_GENDER_TREC21_QUERIES/100K-NEW-QUERIES',
|
||||
'continent': '100K_CONT_TREC21_QUERIES/100K-NEW-QUERIES',
|
||||
'years_category': '100K_YEARS_TREC21_QUERIES/100K-NEW-QUERIES'
|
||||
}
|
||||
|
||||
for class_name in ['gender', 'continent', 'years_category']:
|
||||
|
||||
tables_mrae = []
|
||||
|
||||
benchmarks = [benchmark_name(class_name, k) for k in Ks]
|
||||
|
||||
for data_size in ['100K']:
|
||||
|
||||
table_mrae = Table(name=f'{class_name}-{data_size}-mrae', benchmarks=benchmarks, methods=method_names)
|
||||
tables_mrae.append(table_mrae)
|
||||
|
||||
class_home = join(data_home, dir_names[class_name])
|
||||
classifier_path = join('classifiers', 'FULL', f'classifier_{class_name}_{class_mode}.pkl')
|
||||
test_rankings_path = join(data_home, 'testRanking-TREC21-Queries_Results.json')
|
||||
test_query_prevs_path = join('data', 'prevelance_vectors_judged_docs.json')
|
||||
results_home = join('results', 'modsel', class_name, data_size)
|
||||
|
||||
tfidf, classifier, conf_matrix = pickle.load(open(classifier_path, 'rb'))
|
||||
|
||||
experiment_prot = RetrievedSamples(
|
||||
class_home,
|
||||
test_rankings_path,
|
||||
test_query_prevs_path,
|
||||
vectorizer=tfidf,
|
||||
class_name=class_name,
|
||||
classes=classifier.classes_
|
||||
)
|
||||
for method_name, quantifier in methods(classifier):
|
||||
|
||||
results_path = join(results_home, method_name + '.pkl')
|
||||
results = qp.util.pickled_resource(results_path, run_experiment)
|
||||
|
||||
for k in Ks:
|
||||
table_mrae.add(benchmark=benchmark_name(class_name, k), method=method_name, v=results['mrae'][k])
|
||||
|
||||
Table.LatexPDF(f'./latex/modsel/{class_name}.pdf', tables=tables_mrae)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
|
||||
"""
|
||||
This file implements some of the methods presented in the FAccT'22 paper by
|
||||
Ghazimatin, Kleindessner, Russell, Abedjan, and Golebiowski,
|
||||
Measuring Fairness of Rankings under Noisy Sensitive Information.
|
||||
|
||||
In particular, it implements two variants of a method relying on M3=rND:
|
||||
one in which the assumed graphical model is P(Â,A,S) = P(Â|A)*P(S|A) (called "b")
|
||||
and another in which the assumed graphical model is P(Â,A,S) = P(Â|A)*P(S|Â) (called "d")
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from abc import ABC, abstractmethod
|
||||
from sklearn.metrics import confusion_matrix
|
||||
|
||||
from quapy.method.aggregative import CC
|
||||
|
||||
|
||||
class AbstractM3rND(ABC):
|
||||
def __init__(self, classifier):
|
||||
self.quantifier = CC(classifier)
|
||||
|
||||
def proxy_labels(self, instances):
|
||||
return self.quantifier.classify(instances)
|
||||
|
||||
def quantify(self, instances):
|
||||
return self.quantifier.quantify(instances)
|
||||
|
||||
@abstractmethod
|
||||
def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray):
|
||||
...
|
||||
|
||||
def get_confusion_matrix(self, X, y, additive_smoothing=0.5):
|
||||
"""
|
||||
Some confusion matrices may contain 0 values for certain classes, and this causes
|
||||
instabilities in the correction. If requested, applies additive smoothing. Default
|
||||
is adding half a count.
|
||||
|
||||
:param X: array-like with the covariates
|
||||
:param y: array-like with the true labels
|
||||
:param additive_smoothing: float, default 0.5
|
||||
:return: the confusion matrix C with entries Cij=P(Y=i,Ŷ=j)
|
||||
"""
|
||||
proxy_labels = self.proxy_labels(X)
|
||||
true_labels = y
|
||||
labels = self.quantifier.classes_
|
||||
conf_matrix = confusion_matrix(true_labels, proxy_labels, labels=labels)
|
||||
if additive_smoothing > 0:
|
||||
conf_matrix = conf_matrix.astype(float) + additive_smoothing
|
||||
return conf_matrix
|
||||
|
||||
|
||||
class M3rND_ModelB(AbstractM3rND):
|
||||
def __init__(self, classifier):
|
||||
super().__init__(classifier)
|
||||
|
||||
def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray):
|
||||
# conf_matrix contains values Cij=P(Y=i,Ŷ=j)
|
||||
# truecond_matrix contains values Cij=P(Ŷ=j|Y=i) (truecond stands for "conditioned on true labels")
|
||||
truecond_matrix = conf_matrix / conf_matrix.sum(axis=1, keepdims=True)
|
||||
p = truecond_matrix[0, 1] # P(hat{A}=1|A=0)
|
||||
q = truecond_matrix[1, 0] # P(hat{A}=0|A=1)
|
||||
den = (1 - p - q)
|
||||
if den != 0:
|
||||
corr = 1./den
|
||||
rND_estim = rND_estim * corr
|
||||
return rND_estim
|
||||
|
||||
|
||||
class M3rND_ModelD(AbstractM3rND):
|
||||
def __init__(self, classifier):
|
||||
super().__init__(classifier)
|
||||
|
||||
def fair_measure_correction(self, rND_estim: float, conf_matrix: np.ndarray):
|
||||
# conf_matrix contains values Cij=P(Y=i,Ŷ=j)
|
||||
# truecond_matrix contains values Cij=P(Ŷ=j|Y=i) (truecond stands for "conditioned on true labels")
|
||||
truecond_matrix = conf_matrix / conf_matrix.sum(axis=1, keepdims=True)
|
||||
prev_A = conf_matrix.sum(axis=1)
|
||||
beta = prev_A[1] # P(A)
|
||||
p = truecond_matrix[0, 1] # P(hat{A}=1|A=0)
|
||||
q = truecond_matrix[1, 0] # P(hat{A}=0|A=1)
|
||||
x = (1 - q) * beta + p * (1 - beta)
|
||||
y = q * beta + (1 - p) * (1 - beta)
|
||||
if x != 0 and y != 0:
|
||||
corr = ((((1 - q) * beta) / x) - (q * beta / y))
|
||||
rND_estim = rND_estim * corr
|
||||
return rND_estim
|
||||
|
||||
|
|
@ -0,0 +1,124 @@
|
|||
import itertools
|
||||
import os.path
|
||||
import pickle
|
||||
import numpy as np
|
||||
from Retrieval.experiments import methods
|
||||
from Retrieval.commons import CLASS_NAMES, Ks, DATA_SIZES
|
||||
from os.path import join
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
|
||||
data_home = 'data'
|
||||
class_mode = 'multiclass'
|
||||
|
||||
method_names = [name for name, *other in methods(None, 'continent')]
|
||||
|
||||
all_results = {}
|
||||
|
||||
class_name_label = {
|
||||
'continent': 'Geographic Location',
|
||||
'gender': 'Gender',
|
||||
'years_category': 'Age of Topic'
|
||||
}
|
||||
|
||||
|
||||
# loads all MRAE results, and returns a dictionary containing the values, which is indexed by:
|
||||
# class_name -> data_size -> method_name -> k -> stat -> float
|
||||
# where stat is "mean", "std", "max"
|
||||
def load_all_results():
|
||||
|
||||
for class_name in CLASS_NAMES:
|
||||
|
||||
all_results[class_name] = {}
|
||||
|
||||
for data_size in DATA_SIZES:
|
||||
|
||||
all_results[class_name][data_size] = {}
|
||||
|
||||
results_home = join('results', class_name, class_mode, data_size)
|
||||
|
||||
all_results[class_name][data_size] = {}
|
||||
|
||||
for method_name in method_names:
|
||||
results_path = join(results_home, method_name + '.pkl')
|
||||
try:
|
||||
results = pickle.load(open(results_path, 'rb'))
|
||||
except Exception as e:
|
||||
print(f'missing result {results}', e)
|
||||
|
||||
all_results[class_name][data_size][method_name] = {}
|
||||
for k in Ks:
|
||||
all_results[class_name][data_size][method_name][k] = {}
|
||||
values = results['mrae']
|
||||
all_results[class_name][data_size][method_name][k]['mean'] = np.mean(values[k])
|
||||
all_results[class_name][data_size][method_name][k]['std'] = np.std(values[k])
|
||||
all_results[class_name][data_size][method_name][k]['max'] = np.max(values[k])
|
||||
|
||||
return all_results
|
||||
|
||||
|
||||
results = load_all_results()
|
||||
|
||||
# generates the class-independent, size-independent plots for y-axis=MRAE in which:
|
||||
# - the x-axis displays the Ks
|
||||
|
||||
for class_name in CLASS_NAMES:
|
||||
for data_size in DATA_SIZES[:1]:
|
||||
|
||||
log = class_name=='gender'
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
max_means = []
|
||||
markers = itertools.cycle(['o', 's', '^', 'D', 'v', '*', '+'])
|
||||
for method_name in method_names:
|
||||
# class_name -> data_size -> method_name -> k -> stat -> float
|
||||
means = [
|
||||
results[class_name][data_size][method_name][k]['mean'] for k in Ks
|
||||
]
|
||||
stds = [
|
||||
results[class_name][data_size][method_name][k]['std'] for k in Ks
|
||||
]
|
||||
# max_mean = np.max([
|
||||
# results[class_name][data_size][method_name][k]['max'] for k in Ks
|
||||
# ])
|
||||
max_means.append(max(means))
|
||||
|
||||
means = np.asarray(means)
|
||||
stds = np.asarray(stds)
|
||||
|
||||
method_name = method_name.replace('NaiveQuery', 'Naive@$k$')
|
||||
method_name = method_name.replace('KDEy-ML', 'KDEy')
|
||||
marker = next(markers)
|
||||
line = ax.plot(Ks, means, 'o-', label=method_name, color=None, linewidth=3, markersize=10, marker=marker)
|
||||
color = line[-1].get_color()
|
||||
if log:
|
||||
ax.set_yscale('log')
|
||||
# ax.fill_between(Ks, means - stds, means + stds, alpha=0.3, color=color)
|
||||
|
||||
ax.grid(True, which='both', axis='y', color='gray', linestyle='--', linewidth=0.3)
|
||||
ax.set_xlabel('k')
|
||||
ax.set_ylabel('RAE' + (' (log scale)' if log else ''))
|
||||
data_size_label = '$\mathcal{L}_{10\mathrm{K}}$'
|
||||
ax.set_title(f'{class_name_label[class_name]} from {data_size_label}')
|
||||
ax.set_ylim([0, max(max_means)*1.05])
|
||||
|
||||
if class_name == 'years_category':
|
||||
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
||||
|
||||
os.makedirs(f'plots/var_k/{class_name}', exist_ok=True)
|
||||
plotpath = f'plots/var_k/{class_name}/{data_size}_mrae.pdf'
|
||||
print(f'saving plot in {plotpath}')
|
||||
plt.savefig(plotpath, bbox_inches='tight')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
import itertools
|
||||
import os.path
|
||||
from Retrieval.experiments import methods
|
||||
from Retrieval.commons import CLASS_NAMES, Ks, DATA_SIZES
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from Retrieval.plot_mrae_xaxis_k import load_all_results
|
||||
|
||||
data_home = 'data'
|
||||
class_mode = 'multiclass'
|
||||
|
||||
method_names = [name for name, *other in methods(None)]
|
||||
|
||||
all_results = {}
|
||||
|
||||
class_name_label = {
|
||||
'continent': 'Geographic Location',
|
||||
'gender': 'Gender',
|
||||
'years_category': 'Age of Topic'
|
||||
}
|
||||
|
||||
# loads all MRAE results, and returns a dictionary containing the values, which is indexed by:
|
||||
# class_name -> data_size -> method_name -> k -> stat -> float
|
||||
results = load_all_results()
|
||||
|
||||
# generates the class-independent, size-independent plots for y-axis=MRAE in which:
|
||||
# - the x-axis displays the Ks
|
||||
|
||||
# X_DATA_SIZES = [int(x.replace('K', '000').replace('M', '000000').replace('FULL', '3250000')) for x in DATA_SIZES]
|
||||
X_DATA_SIZES = [x.replace('FULL', '3.25M') for x in DATA_SIZES]
|
||||
|
||||
for class_name in CLASS_NAMES:
|
||||
for k in [100]: #Ks:
|
||||
|
||||
log = class_name=='gender'
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
max_means = []
|
||||
markers = itertools.cycle(['o', 's', '^', 'D', 'v', '*', '+'])
|
||||
for method_name in method_names:
|
||||
# class_name -> data_size -> method_name -> k -> stat -> float
|
||||
means = [
|
||||
results[class_name][data_size][method_name][k]['mean'] for data_size in DATA_SIZES
|
||||
]
|
||||
stds = [
|
||||
results[class_name][data_size][method_name][k]['std'] for data_size in DATA_SIZES
|
||||
]
|
||||
# max_mean = np.max([
|
||||
# results[class_name][data_size][method_name][k]['max'] for data_size in DATA_SIZE
|
||||
# ])
|
||||
|
||||
max_means.append(max(means))
|
||||
|
||||
style = 'o-' if method_name != 'CC' else '--'
|
||||
method_name = method_name.replace('NaiveQuery', 'Naive@$k$')
|
||||
method_name = method_name.replace('KDEy-ML', 'KDEy')
|
||||
marker=next(markers)
|
||||
line = ax.plot(X_DATA_SIZES, means, style, label=method_name, color=None, linewidth=3, markersize=10, marker=marker)
|
||||
color = line[-1].get_color()
|
||||
if log:
|
||||
ax.set_yscale('log')
|
||||
# ax.fill_between(Ks, means - stds, means + stds, alpha=0.3, color=color)
|
||||
|
||||
ax.grid(True, which='both', axis='y', color='gray', linestyle='--', linewidth=0.3)
|
||||
ax.set_xlabel('training pool size')
|
||||
ax.set_ylabel('RAE' + (' (log scale)' if log else ''))
|
||||
ax.set_title(f'{class_name_label[class_name]} at exposure {k=}')
|
||||
ax.set_ylim([0, max(max_means)*1.05])
|
||||
|
||||
if class_name == 'years_category':
|
||||
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
||||
|
||||
os.makedirs(f'plots/var_size/{class_name}', exist_ok=True)
|
||||
plotpath = f'plots/var_size/{class_name}/{k}_mrae.pdf'
|
||||
print(f'saving plot in {plotpath}')
|
||||
plt.savefig(plotpath, bbox_inches='tight')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
import os.path
|
||||
import pickle
|
||||
from itertools import zip_longest
|
||||
from commons import RetrievedSamples, load_sample, DATA_SIZES
|
||||
from os.path import join
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
"""
|
||||
Plots the distribution of (predicted) relevance score for the test samples and for the training samples wrt:
|
||||
- training pool size (10K, 50K, 100K, 500K, 1M, FULL)
|
||||
- rank
|
||||
"""
|
||||
|
||||
|
||||
data_home = 'data'
|
||||
|
||||
up_to = 250
|
||||
|
||||
for class_name in ['continent']: # 'num_sitelinks_category', 'relative_pageviews_category', 'years_category', 'continent', 'gender']:
|
||||
test_added = False
|
||||
Mtrs, Mtes, source = [], [], []
|
||||
for data_size in DATA_SIZES:
|
||||
|
||||
class_home = join(data_home, class_name, data_size)
|
||||
classifier_path = join('classifiers', 'FULL', f'classifier_{class_name}.pkl')
|
||||
test_rankings_path = join(data_home, 'testRanking_Results.json')
|
||||
test_query_prevs_path = join(data_home, 'prevelance_vectors_judged_docs.json')
|
||||
|
||||
_, classifier = pickle.load(open(classifier_path, 'rb'))
|
||||
|
||||
experiment_prot = RetrievedSamples(
|
||||
class_home,
|
||||
test_rankings_path,
|
||||
test_query_prevs_path,
|
||||
vectorizer=None,
|
||||
class_name=class_name,
|
||||
classes=classifier.classes_
|
||||
)
|
||||
|
||||
Mtr = []
|
||||
Mte = []
|
||||
pbar = tqdm(experiment_prot(), total=experiment_prot.total())
|
||||
for train, test, *_ in pbar:
|
||||
Xtr, ytr, score_tr = train
|
||||
Xte, yte, score_te = test
|
||||
if len(score_tr) >= up_to:
|
||||
Mtr.append(score_tr)
|
||||
Mte.append(score_te)
|
||||
|
||||
Mtrs.append(Mtr)
|
||||
if not test_added:
|
||||
Mtes.append(Mte)
|
||||
test_added = True
|
||||
source.append(data_size)
|
||||
|
||||
fig, ax = plt.subplots()
|
||||
# train_source = ['train-'+s for s in source]
|
||||
train_source = ['$L_{'+s.replace('FULL', '3.25M').replace('K','\mathrm{K}').replace('M','\mathrm{M}')+'}$' for s in source]
|
||||
# Ms = list(zip(Mtrs, train_source))+list(zip(Mtes, ['test']))
|
||||
Ms = list(zip(Mtrs, train_source)) + list(zip(Mtes, ['$U_{(3.25\mathrm{M})}$']))
|
||||
|
||||
|
||||
for M, source in Ms:
|
||||
M = np.asarray(list(zip_longest(*M, fillvalue=np.nan))).T
|
||||
|
||||
num_rep, num_docs = M.shape
|
||||
|
||||
mean_values = np.nanmean(M, axis=0)
|
||||
n_filled = np.count_nonzero(~np.isnan(M), axis=0)
|
||||
std_errors = np.nanstd(M, axis=0) / np.sqrt(n_filled)
|
||||
|
||||
line = ax.plot(range(num_docs), mean_values, '-', label=source, color=None)
|
||||
color = line[-1].get_color()
|
||||
ax.fill_between(range(num_docs), mean_values - std_errors, mean_values + std_errors, alpha=0.3, color=color)
|
||||
|
||||
|
||||
ax.set_xlabel('rank ($k$)')
|
||||
ax.set_ylabel('predicted relevance score')
|
||||
ax.set_title(class_name.replace('continent', 'Geographic Location'))
|
||||
ax.set_xlim((0,up_to))
|
||||
|
||||
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
||||
|
||||
# plt.show()
|
||||
os.makedirs('plots', exist_ok=True)
|
||||
plotpath = f'plots/{class_name}_rel_distrbution_2.pdf'
|
||||
print(f'saving plot in {plotpath}')
|
||||
plt.savefig(plotpath, bbox_inches='tight')
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
import pandas as pd
|
||||
|
||||
from os.path import join
|
||||
|
||||
from quapy.data import LabelledCollection
|
||||
|
||||
data_home = 'data'
|
||||
CLASS_NAME = 'continent'
|
||||
datasize = '100K'
|
||||
|
||||
file_path = join(data_home, 'prevelance_vectors_judged_docs.json')
|
||||
|
||||
df = pd.read_json(file_path)
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
print(df)
|
||||
|
Before Width: | Height: | Size: 128 KiB After Width: | Height: | Size: 128 KiB |
113
TODO.txt
|
|
@ -1,26 +1,95 @@
|
|||
Solve the warnings issue; right now there is a warning ignore in method/__init__.py:
|
||||
ensembles seem to be broken; they have an internal model selection which takes the parameters, but since quapy now
|
||||
works with protocols it would need to know the validation set in order to pass something like
|
||||
"protocol: APP(val, etc.)"
|
||||
sample_size should not be mandatory when qp.environ['SAMPLE_SIZE'] has been specified
|
||||
clean all the cumbersome methods that have to be implemented for new quantifiers (e.g., n_classes_ prop, etc.)
|
||||
make truly parallel the GridSearchQ
|
||||
make more examples in the "examples" directory
|
||||
merge with master, because I had to fix some problems with QuaNet due to an issue notified via GitHub!
|
||||
added cross_val_predict in qp.model_selection (i.e., a cross_val_predict for quantification) --would be nice to have
|
||||
it parallelized
|
||||
|
||||
Add 'platt' to calib options in EMQ?
|
||||
check the OneVsAll module(s)
|
||||
|
||||
Allow n_prevpoints in APP to be specified by a user-defined grid?
|
||||
check the set_params de neural.py, because the separation of estimator__<param> is not implemented; see also
|
||||
__check_params_colision
|
||||
|
||||
HDy can be customized so that the number of bins is specified, instead of explored within the fit method
|
||||
|
||||
Packaging:
|
||||
==========================================
|
||||
Document methods with paper references
|
||||
unit-tests
|
||||
clean wiki_examples!
|
||||
|
||||
Refactor:
|
||||
==========================================
|
||||
Unify ThresholdOptimization methods, as an extension of PACC (and not ACC), the fit methods are almost identical and
|
||||
use a prob classifier (take into account that PACC uses pcc internally, whereas the threshold methods use cc
|
||||
instead). The fit method of ACC and PACC has a block for estimating the validation estimates that should be unified
|
||||
as well...
|
||||
Refactor protocols. APP and NPP related functionalities are duplicated in functional, LabelledCollection, and evaluation
|
||||
|
||||
|
||||
New features:
|
||||
==========================================
|
||||
Add "measures for evaluating ordinal"?
|
||||
Add datasets for topic.
|
||||
Do we want to cover cross-lingual quantification natively in QuaPy, or does it make more sense as an application on top?
|
||||
|
||||
Current issues:
|
||||
==========================================
|
||||
Revise the class structure of quantification methods and the methods they inherit... There is some confusion regarding
|
||||
methods isbinary, isprobabilistic, and the like. The attribute "learner_" in aggregative quantifiers is also
|
||||
confusing, since there is a getter and a setter.
|
||||
Remove the "deep" in get_params. There is no real compatibility with scikit-learn as for now.
|
||||
SVMperf-based learners do not remove temp files in __del__?
|
||||
In binary quantification (hp, kindle, imdb) we used F1 in the minority class (which in kindle and hp happens to be the
|
||||
negative class). This is not covered in this new implementation, in which the binary case is not treated as such, but as
|
||||
an instance of single-label with 2 labels. Check
|
||||
Add automatic reindex of class labels in LabelledCollection (currently, class indexes should be ordered and with no gaps)
|
||||
OVR I believe is currently tied to aggregative methods. We should provide a general interface also for general quantifiers
|
||||
Currently, being "binary" only adds one checker; we should figure out how to impose the check to be automatically performed
|
||||
Add random seed management to support replicability (see temp_seed in util.py).
|
||||
GridSearchQ is not trully parallelized. It only parallelizes on the predictions.
|
||||
In the context of a quantifier (e.g., QuaNet or CC), the parameters of the learner should be prefixed with "estimator__",
|
||||
in QuaNet this is resolved with a __check_params_colision, but this should be improved. It might be cumbersome to
|
||||
impose the "estimator__" prefix for, e.g., quantifiers like CC though... This should be changed everywhere...
|
||||
QuaNet needs refactoring. The base quantifiers ACC and PACC receive val_data with instances already transformed. This
|
||||
issue is due to a bad design.
|
||||
|
||||
Improvements:
|
||||
==========================================
|
||||
Explore the hyperparameter "number of bins" in HDy
|
||||
Rename EMQ to SLD ?
|
||||
Parallelize the kFCV in ACC and PACC?
|
||||
Parallelize model selection trainings
|
||||
We might want to think of (improving and) adding the class Tabular (it is defined and used on branch tweetsent). A more
|
||||
recent version is in the project ql4facct. This class is meant to generate latex tables from results (highligting
|
||||
best results, computing statistical tests, colouring cells, producing rankings, producing averages, etc.). Trying
|
||||
to generate tables is typically a bad idea, but in this specific case we do have pretty good control of what an
|
||||
experiment looks like. (Do we want to abstract experimental results? this could be useful not only for tables but
|
||||
also for plots).
|
||||
Add proper logging system. Currently we use print
|
||||
It might be good to simplify the number of methods that have to be implemented for any new Quantifier. At the moment,
|
||||
there are many functions like get_params, set_params, and, specially, @property classes_, which are cumbersome to
|
||||
implement for quick experiments. A possible solution is to impose get_params and set_params only in cases in which
|
||||
the model extends some "ModelSelectable" interface only. The classes_ should have a default implementation.
|
||||
|
||||
Checks:
|
||||
==========================================
|
||||
How many times is the system of equations for ACC and PACC not solved? How many times is it clipped? Do they sum up
|
||||
to one always?
|
||||
Re-check how hyperparameters from the quantifier and hyperparameters from the classifier (in aggregative quantifiers)
|
||||
is handled. In scikit-learn the hyperparameters from a wrapper method are indicated directly whereas the hyperparams
|
||||
from the internal learner are prefixed with "estimator__". In QuaPy, combinations having to do with the classifier
|
||||
can be computed at the begining, and then in an internal loop the hyperparams of the quantifier can be explored,
|
||||
passing fit_learner=False.
|
||||
Re-check Ensembles. As for now, they are strongly tied to aggregative quantifiers.
|
||||
Re-think the environment variables. Maybe add new ones (like, for example, parameters for the plots)
|
||||
Do we want to wrap prevalences (currently simple np.ndarray) as a class? This might be convenient for some interfaces
|
||||
(e.g., for specifying artificial prevalences in samplings, for printing them -- currently supported through
|
||||
F.strprev(), etc.). This might however add some overload, and prevent/difficult post processing with numpy.
|
||||
Would be nice to get a better integration with sklearn.
|
||||
|
||||
Add the fix suggested by Alexander?
|
||||
"For a more general application, I would maybe first establish a per-class threshold value of plausible prevalence
|
||||
based on the number of actual positives and the required sample size; e.g., for sample_size=100 and actual
|
||||
positives [10, 100, 500] -> [0.1, 1.0, 1.0], meaning that class 0 can be sampled at most at 0.1 prevalence, while
|
||||
the others can be sampled up to 1. prevalence. Then, when a prevalence value is requested, e.g., [0.33, 0.33, 0.33],
|
||||
we may either clip each value and normalize (as you suggest for the extreme case, e.g., [0.1, 0.33, 0.33]/sum) or
|
||||
scale each value by per-class thresholds, i.e., [0.33*0.1, 0.33*1, 0.33*1]/sum."
|
||||
- This affects LabelledCollection
|
||||
- This functionality should be accessible via sampling protocols and evaluation functions
|
||||
|
||||
- [TODO] document confidence in manuals
|
||||
- [TODO] Test the return_type="index" in protocols and finish the "distributing_samples.py" example
|
||||
- [TODO] Add EDy (an implementation is available at quantificationlib)
|
||||
- [TODO] add ensemble methods SC-MQ, MC-SQ, MC-MQ
|
||||
- [TODO] add HistNetQ
|
||||
- [TODO] add CDE-iteration and Bayes-CDE methods
|
||||
- [TODO] add Friedman's method and DeBias
|
||||
- [TODO] check ignore warning stuff
|
||||
check https://docs.python.org/3/library/warnings.html#temporarily-suppressing-warnings
|
||||
- [TODO] nmd and md are not selectable from qp.evaluation.evaluate as a string
|
||||
|
|
@ -1 +0,0 @@
|
|||
build/
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
|
@ -1,124 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Overview: module code — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/sphinx_highlight.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item active">Overview: module code</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>All modules for which code is available</h1>
|
||||
<ul><li><a href="quapy/classification/calibration.html">quapy.classification.calibration</a></li>
|
||||
<li><a href="quapy/classification/methods.html">quapy.classification.methods</a></li>
|
||||
<li><a href="quapy/classification/neural.html">quapy.classification.neural</a></li>
|
||||
<li><a href="quapy/classification/svmperf.html">quapy.classification.svmperf</a></li>
|
||||
<li><a href="quapy/data/base.html">quapy.data.base</a></li>
|
||||
<li><a href="quapy/data/datasets.html">quapy.data.datasets</a></li>
|
||||
<li><a href="quapy/data/preprocessing.html">quapy.data.preprocessing</a></li>
|
||||
<li><a href="quapy/data/reader.html">quapy.data.reader</a></li>
|
||||
<li><a href="quapy/error.html">quapy.error</a></li>
|
||||
<li><a href="quapy/evaluation.html">quapy.evaluation</a></li>
|
||||
<li><a href="quapy/functional.html">quapy.functional</a></li>
|
||||
<li><a href="quapy/method/_kdey.html">quapy.method._kdey</a></li>
|
||||
<li><a href="quapy/method/_neural.html">quapy.method._neural</a></li>
|
||||
<li><a href="quapy/method/_threshold_optim.html">quapy.method._threshold_optim</a></li>
|
||||
<li><a href="quapy/method/aggregative.html">quapy.method.aggregative</a></li>
|
||||
<li><a href="quapy/method/base.html">quapy.method.base</a></li>
|
||||
<li><a href="quapy/method/meta.html">quapy.method.meta</a></li>
|
||||
<li><a href="quapy/method/non_aggregative.html">quapy.method.non_aggregative</a></li>
|
||||
<li><a href="quapy/model_selection.html">quapy.model_selection</a></li>
|
||||
<li><a href="quapy/plot.html">quapy.plot</a></li>
|
||||
<li><a href="quapy/protocol.html">quapy.protocol</a></li>
|
||||
<li><a href="quapy/util.html">quapy.util</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,319 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.classification.calibration — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||||
<script src="../../../_static/jquery.js"></script>
|
||||
<script src="../../../_static/underscore.js"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../../_static/doctools.js"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.classification.calibration</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.classification.calibration</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">abstention.calibration</span> <span class="kn">import</span> <span class="n">NoBiasVectorScaling</span><span class="p">,</span> <span class="n">TempScaling</span><span class="p">,</span> <span class="n">VectorScaling</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">clone</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">cross_val_predict</span><span class="p">,</span> <span class="n">train_test_split</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
|
||||
<span class="c1"># Wrappers of calibration defined by Alexandari et al. in paper <http://proceedings.mlr.press/v119/alexandari20a.html></span>
|
||||
<span class="c1"># requires "pip install abstension"</span>
|
||||
<span class="c1"># see https://github.com/kundajelab/abstention</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifier"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifier">[docs]</a><span class="k">class</span> <span class="nc">RecalibratedProbabilisticClassifier</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Abstract class for (re)calibration method from `abstention.calibration`, as defined in</span>
|
||||
<span class="sd"> `Alexandari, A., Kundaje, A., & Shrikumar, A. (2020, November). Maximum likelihood with bias-corrected calibration</span>
|
||||
<span class="sd"> is hard-to-beat at label shift adaptation. In International Conference on Machine Learning (pp. 222-232). PMLR.</span>
|
||||
<span class="sd"> <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">pass</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase">[docs]</a><span class="k">class</span> <span class="nc">RecalibratedProbabilisticClassifierBase</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">RecalibratedProbabilisticClassifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Applies a (re)calibration method from `abstention.calibration`, as defined in</span>
|
||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_.</span>
|
||||
|
||||
|
||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
||||
<span class="sd"> :param calibrator: the calibration object (an instance of abstention.calibration.CalibratorFactory)</span>
|
||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior probabilities, or a float p</span>
|
||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer); default=None</span>
|
||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">calibrator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">calibrator</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
|
||||
|
||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Fits the calibration for the probabilistic classifier.</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
||||
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">k</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">k</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'wrong value for val_split: the number of folds must be > 2'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit_cv</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">k</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="mi">0</span> <span class="o"><</span> <span class="n">k</span> <span class="o"><</span> <span class="mi">1</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'wrong value for val_split: the proportion of validation documents must be in (0,1)'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit_tr_val</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit_cv"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_cv">[docs]</a> <span class="k">def</span> <span class="nf">fit_cv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Fits the calibration in a cross-validation manner, i.e., it generates posterior probabilities for all</span>
|
||||
<span class="sd"> training instances via cross-validation, and then retrains the classifier on all training instances.</span>
|
||||
<span class="sd"> The posterior probabilities thus generated are used for calibrating the outputs of the classifier.</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
||||
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="n">cross_val_predict</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'predict_proba'</span>
|
||||
<span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="n">nclasses</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">nclasses</span><span class="p">)[</span><span class="n">y</span><span class="p">],</span> <span class="n">posterior_supplied</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.fit_tr_val"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.fit_tr_val">[docs]</a> <span class="k">def</span> <span class="nf">fit_tr_val</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Fits the calibration in a train/val-split manner, i.e.t, it partitions the training instances into a</span>
|
||||
<span class="sd"> training and a validation set, and then uses the training samples to learn classifier which is then used</span>
|
||||
<span class="sd"> to generate posterior probabilities for the held-out validation data. These posteriors are used to calibrate</span>
|
||||
<span class="sd"> the classifier. The classifier is not retrained on the whole dataset.</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
||||
<span class="sd"> :param y: array-like of shape `(n_samples,)` with the class labels</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">Xtr</span><span class="p">,</span> <span class="n">Xva</span><span class="p">,</span> <span class="n">ytr</span><span class="p">,</span> <span class="n">yva</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">test_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">val_split</span><span class="p">,</span> <span class="n">stratify</span><span class="o">=</span><span class="n">y</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">Xtr</span><span class="p">,</span> <span class="n">ytr</span><span class="p">)</span>
|
||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">Xva</span><span class="p">)</span>
|
||||
<span class="n">nclasses</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">yva</span><span class="p">))</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">nclasses</span><span class="p">)[</span><span class="n">yva</span><span class="p">],</span> <span class="n">posterior_supplied</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.predict"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict">[docs]</a> <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Predicts class labels for the data instances in `X`</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_samples,)` with the class label predictions</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="RecalibratedProbabilisticClassifierBase.predict_proba"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.RecalibratedProbabilisticClassifierBase.predict_proba">[docs]</a> <span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generates posterior probabilities for the data instances in `X`</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the data instances</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with posterior probabilities</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">calibration_function</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span></div>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the classes on which the classifier has been trained on</span>
|
||||
|
||||
<span class="sd"> :return: array-like of shape `(n_classes)`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="NBVSCalibration"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.NBVSCalibration">[docs]</a><span class="k">class</span> <span class="nc">NBVSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Applies the No-Bias Vector Scaling (NBVS) calibration method from `abstention.calibration`, as defined in</span>
|
||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
||||
|
||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
|
||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
|
||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">NoBiasVectorScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="BCTSCalibration"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.BCTSCalibration">[docs]</a><span class="k">class</span> <span class="nc">BCTSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Applies the Bias-Corrected Temperature Scaling (BCTS) calibration method from `abstention.calibration`, as defined in</span>
|
||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
||||
|
||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
|
||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
|
||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">,</span> <span class="n">bias_positions</span><span class="o">=</span><span class="s1">'all'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TSCalibration"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.TSCalibration">[docs]</a><span class="k">class</span> <span class="nc">TSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Applies the Temperature Scaling (TS) calibration method from `abstention.calibration`, as defined in</span>
|
||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
||||
|
||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
|
||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
|
||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">TempScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="VSCalibration"><a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.calibration.VSCalibration">[docs]</a><span class="k">class</span> <span class="nc">VSCalibration</span><span class="p">(</span><span class="n">RecalibratedProbabilisticClassifierBase</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Applies the Vector Scaling (VS) calibration method from `abstention.calibration`, as defined in</span>
|
||||
<span class="sd"> `Alexandari et al. paper <http://proceedings.mlr.press/v119/alexandari20a.html>`_:</span>
|
||||
|
||||
<span class="sd"> :param classifier: a scikit-learn probabilistic classifier</span>
|
||||
<span class="sd"> :param val_split: indicate an integer k for performing kFCV to obtain the posterior prevalences, or a float p</span>
|
||||
<span class="sd"> in (0,1) to indicate that the posteriors are obtained in a stratified validation split containing p% of the</span>
|
||||
<span class="sd"> training instances (the rest is used for training). In any case, the classifier is retrained in the whole</span>
|
||||
<span class="sd"> training set afterwards. Default value is 5.</span>
|
||||
<span class="sd"> :param n_jobs: indicate the number of parallel workers (only when val_split is an integer)</span>
|
||||
<span class="sd"> :param verbose: whether or not to display information in the standard output</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">calibrator</span> <span class="o">=</span> <span class="n">VectorScaling</span><span class="p">(</span><span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span></div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,220 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.classification.methods — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.classification.methods</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.classification.methods</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.decomposition</span> <span class="kn">import</span> <span class="n">TruncatedSVD</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LowRankLogisticRegression">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">LowRankLogisticRegression</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> An example of a classification method (i.e., an object that implements `fit`, `predict`, and `predict_proba`)</span>
|
||||
<span class="sd"> that also generates embedded inputs (i.e., that implements `transform`), as those required for</span>
|
||||
<span class="sd"> :class:`quapy.method.neural.QuaNet`. This is a mock method to allow for easily instantiating</span>
|
||||
<span class="sd"> :class:`quapy.method.neural.QuaNet` on array-like real-valued instances.</span>
|
||||
<span class="sd"> The transformation consists of applying :class:`sklearn.decomposition.TruncatedSVD`</span>
|
||||
<span class="sd"> while classification is performed using :class:`sklearn.linear_model.LogisticRegression` on the low-rank space.</span>
|
||||
|
||||
<span class="sd"> :param n_components: the number of principal components to retain</span>
|
||||
<span class="sd"> :param kwargs: parameters for the</span>
|
||||
<span class="sd"> `Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__ classifier</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_components</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_components</span> <span class="o">=</span> <span class="n">n_components</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="LowRankLogisticRegression.get_params">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.get_params">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Get hyper-parameters for this estimator.</span>
|
||||
|
||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'n_components'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">}</span>
|
||||
<span class="n">params</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">())</span>
|
||||
<span class="k">return</span> <span class="n">params</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LowRankLogisticRegression.set_params">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.set_params">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Set the parameters of this estimator.</span>
|
||||
|
||||
<span class="sd"> :param parameters: a `**kwargs` dictionary with the estimator parameters for</span>
|
||||
<span class="sd"> `Logistic Regression <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html>`__</span>
|
||||
<span class="sd"> and eventually also `n_components` for `TruncatedSVD`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">params_</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="s1">'n_components'</span> <span class="ow">in</span> <span class="n">params_</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_components</span> <span class="o">=</span> <span class="n">params_</span><span class="p">[</span><span class="s1">'n_components'</span><span class="p">]</span>
|
||||
<span class="k">del</span> <span class="n">params_</span><span class="p">[</span><span class="s1">'n_components'</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params_</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LowRankLogisticRegression.fit">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.fit">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Fit the model according to the given training data. The fit consists of</span>
|
||||
<span class="sd"> fitting `TruncatedSVD` and then `LogisticRegression` on the low-rank representation.</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` with the instances</span>
|
||||
<span class="sd"> :param y: array-like of shape `(n_samples, n_classes)` with the class labels</span>
|
||||
<span class="sd"> :return: `self`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">nF</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">if</span> <span class="n">nF</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="o">=</span> <span class="n">TruncatedSVD</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_components</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">classes_</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LowRankLogisticRegression.predict">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.predict">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Predicts labels for the instances `X` embedded into the low-rank space.</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
|
||||
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
|
||||
<span class="sd"> instances in `X`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LowRankLogisticRegression.predict_proba">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.predict_proba">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Predicts posterior probabilities for the instances `X` embedded into the low-rank space.</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LowRankLogisticRegression.transform">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.methods.LowRankLogisticRegression.transform">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the low-rank approximation of `X` with `n_components` dimensions, or `X` unaltered if</span>
|
||||
<span class="sd"> `n_components` >= `X.shape[1]`.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to embed</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_samples, n_components)` with the embedded instances</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">pca</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">X</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pca</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,715 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.classification.neural — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.classification.neural</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.classification.neural</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
|
||||
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">torch</span>
|
||||
<span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
|
||||
<span class="kn">import</span> <span class="nn">torch.nn.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">accuracy_score</span><span class="p">,</span> <span class="n">f1_score</span>
|
||||
<span class="kn">from</span> <span class="nn">torch.nn.utils.rnn</span> <span class="kn">import</span> <span class="n">pad_sequence</span>
|
||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">EarlyStop</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="NeuralClassifierTrainer">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">NeuralClassifierTrainer</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Trains a neural network for text classification.</span>
|
||||
|
||||
<span class="sd"> :param net: an instance of `TextClassifierNet` implementing the forward pass</span>
|
||||
<span class="sd"> :param lr: learning rate (default 1e-3)</span>
|
||||
<span class="sd"> :param weight_decay: weight decay (default 0)</span>
|
||||
<span class="sd"> :param patience: number of epochs that do not show any improvement in validation</span>
|
||||
<span class="sd"> to wait before applying early stop (default 10)</span>
|
||||
<span class="sd"> :param epochs: maximum number of training epochs (default 200)</span>
|
||||
<span class="sd"> :param batch_size: batch size for training (default 64)</span>
|
||||
<span class="sd"> :param batch_size_test: batch size for test (default 512)</span>
|
||||
<span class="sd"> :param padding_length: maximum number of tokens to consider in a document (default 300)</span>
|
||||
<span class="sd"> :param device: specify 'cpu' (default) or 'cuda' for enabling gpu</span>
|
||||
<span class="sd"> :param checkpointpath: where to store the parameters of the best model found so far</span>
|
||||
<span class="sd"> according to the evaluation in the held-out validation split (default '../checkpoint/classifier_net.dat')</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
||||
<span class="n">net</span><span class="p">:</span> <span class="s1">'TextClassifierNet'</span><span class="p">,</span>
|
||||
<span class="n">lr</span><span class="o">=</span><span class="mf">1e-3</span><span class="p">,</span>
|
||||
<span class="n">weight_decay</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
||||
<span class="n">patience</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
|
||||
<span class="n">epochs</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span>
|
||||
<span class="n">batch_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
|
||||
<span class="n">batch_size_test</span><span class="o">=</span><span class="mi">512</span><span class="p">,</span>
|
||||
<span class="n">padding_length</span><span class="o">=</span><span class="mi">300</span><span class="p">,</span>
|
||||
<span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">,</span>
|
||||
<span class="n">checkpointpath</span><span class="o">=</span><span class="s1">'../checkpoint/classifier_net.dat'</span><span class="p">):</span>
|
||||
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
|
||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">net</span><span class="p">,</span> <span class="n">TextClassifierNet</span><span class="p">),</span> <span class="sa">f</span><span class="s1">'net is not an instance of </span><span class="si">{</span><span class="n">TextClassifierNet</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="n">net</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocab_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">vocabulary_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="o">=</span><span class="p">{</span>
|
||||
<span class="s1">'lr'</span><span class="p">:</span> <span class="n">lr</span><span class="p">,</span>
|
||||
<span class="s1">'weight_decay'</span><span class="p">:</span> <span class="n">weight_decay</span><span class="p">,</span>
|
||||
<span class="s1">'patience'</span><span class="p">:</span> <span class="n">patience</span><span class="p">,</span>
|
||||
<span class="s1">'epochs'</span><span class="p">:</span> <span class="n">epochs</span><span class="p">,</span>
|
||||
<span class="s1">'batch_size'</span><span class="p">:</span> <span class="n">batch_size</span><span class="p">,</span>
|
||||
<span class="s1">'batch_size_test'</span><span class="p">:</span> <span class="n">batch_size_test</span><span class="p">,</span>
|
||||
<span class="s1">'padding_length'</span><span class="p">:</span> <span class="n">padding_length</span><span class="p">,</span>
|
||||
<span class="s1">'device'</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="p">}</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">checkpointpath</span> <span class="o">=</span> <span class="n">checkpointpath</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[NeuralNetwork running on </span><span class="si">{</span><span class="n">device</span><span class="si">}</span><span class="s1">]'</span><span class="p">)</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">checkpointpath</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="NeuralClassifierTrainer.reset_net_params">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.reset_net_params">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">reset_net_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocab_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Reinitialize the network parameters</span>
|
||||
|
||||
<span class="sd"> :param vocab_size: the size of the vocabulary</span>
|
||||
<span class="sd"> :param n_classes: the number of target classes</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__class__</span><span class="p">(</span><span class="n">vocab_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="p">[</span><span class="s1">'device'</span><span class="p">])</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">xavier_uniform</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="NeuralClassifierTrainer.get_params">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.get_params">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Get hyper-parameters for this estimator</span>
|
||||
|
||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="p">{</span><span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">(),</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span><span class="p">}</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="NeuralClassifierTrainer.set_params">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.set_params">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Set the parameters of this trainer and the learner it is training.</span>
|
||||
<span class="sd"> In this current version, parameter names for the trainer and learner should</span>
|
||||
<span class="sd"> be disjoint.</span>
|
||||
|
||||
<span class="sd"> :param params: a `**kwargs` dictionary with the parameters</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">trainer_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
|
||||
<span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">params</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span> <span class="ow">and</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">learner_hyperparams</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the use of parameter </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s1"> is ambiguous since it can refer to '</span>
|
||||
<span class="sa">f</span><span class="s1">'a parameters of the Trainer or the learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span> <span class="ow">and</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">learner_hyperparams</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'parameter </span><span class="si">{</span><span class="n">key</span><span class="si">}</span><span class="s1"> is not valid'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="n">trainer_hyperparams</span><span class="p">:</span>
|
||||
<span class="n">trainer_hyperparams</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">learner_hyperparams</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span> <span class="o">=</span> <span class="n">trainer_hyperparams</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">learner_hyperparams</span> <span class="o">=</span> <span class="n">learner_hyperparams</span> </div>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">device</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">""" Gets the device in which the network is allocated</span>
|
||||
|
||||
<span class="sd"> :return: device</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">device</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_train_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
|
||||
<span class="n">criterion</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span>
|
||||
<span class="n">losses</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">xi</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
|
||||
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span>
|
||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">yi</span><span class="p">)</span>
|
||||
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
|
||||
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
|
||||
<span class="n">preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="n">status</span><span class="p">[</span><span class="s2">"loss"</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
|
||||
<span class="n">predictions</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">preds</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
|
||||
<span class="n">true_labels</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">yi</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
|
||||
<span class="n">status</span><span class="p">[</span><span class="s2">"acc"</span><span class="p">]</span> <span class="o">=</span> <span class="n">accuracy_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
|
||||
<span class="n">status</span><span class="p">[</span><span class="s2">"f1"</span><span class="p">]</span> <span class="o">=</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">'macro'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">__update_progress_bar</span><span class="p">(</span><span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_test_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
||||
<span class="n">criterion</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">CrossEntropyLoss</span><span class="p">()</span>
|
||||
<span class="n">losses</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">true_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="k">for</span> <span class="n">xi</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="n">data</span><span class="p">:</span>
|
||||
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span>
|
||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">criterion</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">yi</span><span class="p">)</span>
|
||||
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
|
||||
<span class="n">preds</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">predictions</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">preds</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
|
||||
<span class="n">true_labels</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">yi</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
|
||||
|
||||
<span class="n">status</span><span class="p">[</span><span class="s2">"loss"</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
|
||||
<span class="n">status</span><span class="p">[</span><span class="s2">"acc"</span><span class="p">]</span> <span class="o">=</span> <span class="n">accuracy_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
|
||||
<span class="n">status</span><span class="p">[</span><span class="s2">"f1"</span><span class="p">]</span> <span class="o">=</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">true_labels</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">'macro'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">__update_progress_bar</span><span class="p">(</span><span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__update_progress_bar</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
|
||||
<span class="n">pbar</span><span class="o">.</span><span class="n">set_description</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">] training epoch=</span><span class="si">{</span><span class="n">epoch</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'tr-loss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr"</span><span class="p">][</span><span class="s2">"loss"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'tr-acc=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr"</span><span class="p">][</span><span class="s2">"acc"</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% '</span>
|
||||
<span class="sa">f</span><span class="s1">'tr-macroF1=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr"</span><span class="p">][</span><span class="s2">"f1"</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% '</span>
|
||||
<span class="sa">f</span><span class="s1">'patience=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">patience</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'val-loss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va"</span><span class="p">][</span><span class="s2">"loss"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'val-acc=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va"</span><span class="p">][</span><span class="s2">"acc"</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">% '</span>
|
||||
<span class="sa">f</span><span class="s1">'macroF1=</span><span class="si">{</span><span class="mi">100</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va"</span><span class="p">][</span><span class="s2">"f1"</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">%'</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="NeuralClassifierTrainer.fit">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.fit">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Fits the model according to the given training data.</span>
|
||||
|
||||
<span class="sd"> :param instances: list of lists of indexed tokens</span>
|
||||
<span class="sd"> :param labels: array-like of shape `(n_samples, n_classes)` with the class labels</span>
|
||||
<span class="sd"> :param val_split: proportion of training documents to be taken as the validation set (default 0.3)</span>
|
||||
<span class="sd"> :return:</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">val_split</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">train</span><span class="o">.</span><span class="n">classes_</span>
|
||||
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
|
||||
<span class="n">checkpoint</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">checkpointpath</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reset_net_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocab_size</span><span class="p">,</span> <span class="n">train</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
|
||||
|
||||
<span class="n">train_generator</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">train</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">train</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
|
||||
<span class="n">opt</span><span class="p">[</span><span class="s1">'batch_size'</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'padding_length'</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'device'</span><span class="p">])</span>
|
||||
<span class="n">valid_generator</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">val</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">val</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
|
||||
<span class="n">opt</span><span class="p">[</span><span class="s1">'batch_size_test'</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'padding_length'</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'device'</span><span class="p">])</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'tr'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'loss'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'acc'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'f1'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">},</span>
|
||||
<span class="s1">'va'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'loss'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'acc'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'f1'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">}}</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">Adam</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'lr'</span><span class="p">],</span> <span class="n">weight_decay</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'weight_decay'</span><span class="p">])</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span> <span class="o">=</span> <span class="n">EarlyStop</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">'patience'</span><span class="p">],</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">'epochs'</span><span class="p">]</span> <span class="o">+</span> <span class="mi">1</span><span class="p">))</span> <span class="k">as</span> <span class="n">pbar</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">epoch</span> <span class="ow">in</span> <span class="n">pbar</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_train_epoch</span><span class="p">(</span><span class="n">train_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'tr'</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_test_epoch</span><span class="p">(</span><span class="n">valid_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va'</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va'</span><span class="p">][</span><span class="s1">'f1'</span><span class="p">],</span> <span class="n">epoch</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
|
||||
<span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">state_dict</span><span class="p">(),</span> <span class="n">checkpoint</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">STOP</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'training ended by patience exhasted; loading best model parameters in </span><span class="si">{</span><span class="n">checkpoint</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'for epoch </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">early_stop</span><span class="o">.</span><span class="n">best_epoch</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">checkpoint</span><span class="p">))</span>
|
||||
<span class="k">break</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'performing one training pass over the validation set...'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_train_epoch</span><span class="p">(</span><span class="n">valid_generator</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'tr'</span><span class="p">],</span> <span class="n">pbar</span><span class="p">,</span> <span class="n">epoch</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[done]'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="NeuralClassifierTrainer.predict">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Predicts labels for the instances</span>
|
||||
|
||||
<span class="sd"> :param instances: list of lists of indexed tokens</span>
|
||||
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
|
||||
<span class="sd"> instances in `X`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">instances</span><span class="p">),</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="NeuralClassifierTrainer.predict_proba">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.predict_proba">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Predicts posterior probabilities for the instances</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
||||
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">xi</span> <span class="ow">in</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
|
||||
<span class="n">opt</span><span class="p">[</span><span class="s1">'batch_size_test'</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'padding_length'</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'device'</span><span class="p">]):</span>
|
||||
<span class="n">posteriors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">xi</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="NeuralClassifierTrainer.transform">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.NeuralClassifierTrainer.transform">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the embeddings of the instances</span>
|
||||
|
||||
<span class="sd"> :param instances: list of lists of indexed tokens</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_samples, embed_size)` with the embedded instances,</span>
|
||||
<span class="sd"> where `embed_size` is defined by the classification network</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
||||
<span class="n">embeddings</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">trainer_hyperparams</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="k">for</span> <span class="n">xi</span> <span class="ow">in</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span><span class="o">.</span><span class="n">asDataloader</span><span class="p">(</span>
|
||||
<span class="n">opt</span><span class="p">[</span><span class="s1">'batch_size_test'</span><span class="p">],</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">pad_length</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'padding_length'</span><span class="p">],</span> <span class="n">device</span><span class="o">=</span><span class="n">opt</span><span class="p">[</span><span class="s1">'device'</span><span class="p">]):</span>
|
||||
<span class="n">embeddings</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">net</span><span class="o">.</span><span class="n">document_embedding</span><span class="p">(</span><span class="n">xi</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">())</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">embeddings</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TorchDataset">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TorchDataset">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">TorchDataset</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">Dataset</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Transforms labelled instances into a Torch's :class:`torch.utils.data.DataLoader` object</span>
|
||||
|
||||
<span class="sd"> :param instances: list of lists of indexed tokens</span>
|
||||
<span class="sd"> :param labels: array-like of shape `(n_samples, n_classes)` with the class labels</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">instances</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">=</span> <span class="n">labels</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="p">{</span><span class="s1">'doc'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="n">index</span><span class="p">],</span> <span class="s1">'label'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="kc">None</span><span class="p">}</span>
|
||||
|
||||
<div class="viewcode-block" id="TorchDataset.asDataloader">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TorchDataset.asDataloader">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">asDataloader</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">shuffle</span><span class="p">,</span> <span class="n">pad_length</span><span class="p">,</span> <span class="n">device</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Converts the labelled collection into a Torch DataLoader with dynamic padding for</span>
|
||||
<span class="sd"> the batch</span>
|
||||
|
||||
<span class="sd"> :param batch_size: batch size</span>
|
||||
<span class="sd"> :param shuffle: whether or not to shuffle instances</span>
|
||||
<span class="sd"> :param pad_length: the maximum length for the list of tokens (dynamic padding is</span>
|
||||
<span class="sd"> applied, meaning that if the longest document in the batch is shorter than</span>
|
||||
<span class="sd"> `pad_length`, then the batch is padded up to its length, and not to `pad_length`.</span>
|
||||
<span class="sd"> :param device: whether to allocate tensors in cpu or in cuda</span>
|
||||
<span class="sd"> :return: a :class:`torch.utils.data.DataLoader` object</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="nf">collate</span><span class="p">(</span><span class="n">batch</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">LongTensor</span><span class="p">(</span><span class="n">item</span><span class="p">[</span><span class="s1">'doc'</span><span class="p">][:</span><span class="n">pad_length</span><span class="p">])</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">batch</span><span class="p">]</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pad_sequence</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">padding_value</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'PAD_INDEX'</span><span class="p">])</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="n">targets</span> <span class="o">=</span> <span class="p">[</span><span class="n">item</span><span class="p">[</span><span class="s1">'label'</span><span class="p">]</span> <span class="k">for</span> <span class="n">item</span> <span class="ow">in</span> <span class="n">batch</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">targets</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">data</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">targets</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">targets</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">long</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="p">[</span><span class="n">data</span><span class="p">,</span> <span class="n">targets</span><span class="p">]</span>
|
||||
|
||||
<span class="n">torchDataset</span> <span class="o">=</span> <span class="n">TorchDataset</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">DataLoader</span><span class="p">(</span><span class="n">torchDataset</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="n">shuffle</span><span class="p">,</span> <span class="n">collate_fn</span><span class="o">=</span><span class="n">collate</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TextClassifierNet">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">TextClassifierNet</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Abstract Text classifier (`torch.nn.Module`)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<div class="viewcode-block" id="TextClassifierNet.document_embedding">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.document_embedding">[docs]</a>
|
||||
<span class="nd">@abstractmethod</span>
|
||||
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Embeds documents (i.e., performs the forward pass up to the</span>
|
||||
<span class="sd"> next-to-last layer).</span>
|
||||
|
||||
<span class="sd"> :param x: a batch of instances, typically generated by a torch's `DataLoader`</span>
|
||||
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
|
||||
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
|
||||
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
|
||||
<span class="sd"> dimensionality of the embedding</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TextClassifierNet.forward">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.forward">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Performs the forward pass.</span>
|
||||
|
||||
<span class="sd"> :param x: a batch of instances, typically generated by a torch's `DataLoader`</span>
|
||||
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
|
||||
<span class="sd"> :return: a tensor of shape `(n_instances, n_classes)` with the decision scores</span>
|
||||
<span class="sd"> for each of the instances and classes</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">doc_embedded</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">document_embedding</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">output</span><span class="p">(</span><span class="n">doc_embedded</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TextClassifierNet.dimensions">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.dimensions">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">dimensions</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Gets the number of dimensions of the embedding space</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TextClassifierNet.predict_proba">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.predict_proba">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Predicts posterior probabilities for the instances in `x`</span>
|
||||
|
||||
<span class="sd"> :param x: a torch tensor of indexed tokens with shape `(n_instances, pad_length)`</span>
|
||||
<span class="sd"> where `n_instances` is the number of instances in the batch, and `pad_length`</span>
|
||||
<span class="sd"> is length of the pad in the batch</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_samples, n_classes)` with the posterior probabilities</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">detach</span><span class="p">()</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TextClassifierNet.xavier_uniform">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.xavier_uniform">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">xavier_uniform</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Performs Xavier initialization of the network parameters</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">parameters</span><span class="p">():</span>
|
||||
<span class="k">if</span> <span class="n">p</span><span class="o">.</span><span class="n">dim</span><span class="p">()</span> <span class="o">></span> <span class="mi">1</span> <span class="ow">and</span> <span class="n">p</span><span class="o">.</span><span class="n">requires_grad</span><span class="p">:</span>
|
||||
<span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">init</span><span class="o">.</span><span class="n">xavier_uniform_</span><span class="p">(</span><span class="n">p</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TextClassifierNet.get_params">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.TextClassifierNet.get_params">[docs]</a>
|
||||
<span class="nd">@abstractmethod</span>
|
||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Get hyper-parameters for this estimator</span>
|
||||
|
||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span></div>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Return the size of the vocabulary</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LSTMnet">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">LSTMnet</span><span class="p">(</span><span class="n">TextClassifierNet</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on</span>
|
||||
<span class="sd"> Long Short Term Memory networks.</span>
|
||||
|
||||
<span class="sd"> :param vocabulary_size: the size of the vocabulary</span>
|
||||
<span class="sd"> :param n_classes: number of target classes</span>
|
||||
<span class="sd"> :param embedding_size: the dimensionality of the word embeddings space (default 100)</span>
|
||||
<span class="sd"> :param hidden_size: the dimensionality of the hidden space (default 256)</span>
|
||||
<span class="sd"> :param repr_size: the dimensionality of the document embeddings space (default 100)</span>
|
||||
<span class="sd"> :param lstm_class_nlayers: number of LSTM layers (default 1)</span>
|
||||
<span class="sd"> :param drop_p: drop probability for dropout (default 0.5)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">embedding_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">hidden_size</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">repr_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">lstm_class_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="n">drop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">):</span>
|
||||
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span> <span class="o">=</span> <span class="n">vocabulary_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span>
|
||||
<span class="s1">'embedding_size'</span><span class="p">:</span> <span class="n">embedding_size</span><span class="p">,</span>
|
||||
<span class="s1">'hidden_size'</span><span class="p">:</span> <span class="n">hidden_size</span><span class="p">,</span>
|
||||
<span class="s1">'repr_size'</span><span class="p">:</span> <span class="n">repr_size</span><span class="p">,</span>
|
||||
<span class="s1">'lstm_class_nlayers'</span><span class="p">:</span> <span class="n">lstm_class_nlayers</span><span class="p">,</span>
|
||||
<span class="s1">'drop_p'</span><span class="p">:</span> <span class="n">drop_p</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">embedding_size</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="n">embedding_size</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="n">lstm_class_nlayers</span><span class="p">,</span> <span class="n">dropout</span><span class="o">=</span><span class="n">drop_p</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">drop_p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">=</span> <span class="n">repr_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">hidden_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init_hidden</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">set_size</span><span class="p">):</span>
|
||||
<span class="n">opt</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span>
|
||||
<span class="n">var_hidden</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">'lstm_class_nlayers'</span><span class="p">],</span> <span class="n">set_size</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">'hidden_size'</span><span class="p">])</span>
|
||||
<span class="n">var_cell</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">opt</span><span class="p">[</span><span class="s1">'lstm_class_nlayers'</span><span class="p">],</span> <span class="n">set_size</span><span class="p">,</span> <span class="n">opt</span><span class="p">[</span><span class="s1">'hidden_size'</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">:</span>
|
||||
<span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span> <span class="o">=</span> <span class="n">var_hidden</span><span class="o">.</span><span class="n">cuda</span><span class="p">(),</span> <span class="n">var_cell</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span>
|
||||
|
||||
<div class="viewcode-block" id="LSTMnet.document_embedding">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet.document_embedding">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Embeds documents (i.e., performs the forward pass up to the</span>
|
||||
<span class="sd"> next-to-last layer).</span>
|
||||
|
||||
<span class="sd"> :param x: a batch of instances, typically generated by a torch's `DataLoader`</span>
|
||||
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
|
||||
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
|
||||
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
|
||||
<span class="sd"> dimensionality of the embedding</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">embedded</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
|
||||
<span class="n">rnn_output</span><span class="p">,</span> <span class="n">rnn_hidden</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="p">(</span><span class="n">embedded</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">__init_hidden</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">size</span><span class="p">()[</span><span class="mi">0</span><span class="p">]))</span>
|
||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">rnn_hidden</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span>
|
||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">abstracted</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LSTMnet.get_params">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.LSTMnet.get_params">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Get hyper-parameters for this estimator</span>
|
||||
|
||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span></div>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Return the size of the vocabulary</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="CNNnet">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">CNNnet</span><span class="p">(</span><span class="n">TextClassifierNet</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> An implementation of :class:`quapy.classification.neural.TextClassifierNet` based on</span>
|
||||
<span class="sd"> Convolutional Neural Networks.</span>
|
||||
|
||||
<span class="sd"> :param vocabulary_size: the size of the vocabulary</span>
|
||||
<span class="sd"> :param n_classes: number of target classes</span>
|
||||
<span class="sd"> :param embedding_size: the dimensionality of the word embeddings space (default 100)</span>
|
||||
<span class="sd"> :param hidden_size: the dimensionality of the hidden space (default 256)</span>
|
||||
<span class="sd"> :param repr_size: the dimensionality of the document embeddings space (default 100)</span>
|
||||
<span class="sd"> :param kernel_heights: list of kernel lengths (default [3,5,7]), i.e., the number of</span>
|
||||
<span class="sd"> consecutive tokens that each kernel covers</span>
|
||||
<span class="sd"> :param stride: convolutional stride (default 1)</span>
|
||||
<span class="sd"> :param stride: convolutional pad (default 0)</span>
|
||||
<span class="sd"> :param drop_p: drop probability for dropout (default 0.5)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">embedding_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">hidden_size</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> <span class="n">repr_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
|
||||
<span class="n">kernel_heights</span><span class="o">=</span><span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span> <span class="n">stride</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">padding</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">drop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">CNNnet</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span> <span class="o">=</span> <span class="n">vocabulary_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span><span class="o">=</span><span class="p">{</span>
|
||||
<span class="s1">'embedding_size'</span><span class="p">:</span> <span class="n">embedding_size</span><span class="p">,</span>
|
||||
<span class="s1">'hidden_size'</span><span class="p">:</span> <span class="n">hidden_size</span><span class="p">,</span>
|
||||
<span class="s1">'repr_size'</span><span class="p">:</span> <span class="n">repr_size</span><span class="p">,</span>
|
||||
<span class="s1">'kernel_heights'</span><span class="p">:</span><span class="n">kernel_heights</span><span class="p">,</span>
|
||||
<span class="s1">'stride'</span><span class="p">:</span> <span class="n">stride</span><span class="p">,</span>
|
||||
<span class="s1">'drop_p'</span><span class="p">:</span> <span class="n">drop_p</span>
|
||||
<span class="p">}</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Embedding</span><span class="p">(</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">embedding_size</span><span class="p">)</span>
|
||||
<span class="n">in_channels</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv1</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv2</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">conv3</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">(</span><span class="n">in_channels</span><span class="p">,</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="p">(</span><span class="n">kernel_heights</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="n">embedding_size</span><span class="p">),</span> <span class="n">stride</span><span class="p">,</span> <span class="n">padding</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">drop_p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dim</span> <span class="o">=</span> <span class="n">repr_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">kernel_heights</span><span class="p">)</span> <span class="o">*</span> <span class="n">hidden_size</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dim</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__conv_block</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">input</span><span class="p">,</span> <span class="n">conv_layer</span><span class="p">):</span>
|
||||
<span class="n">conv_out</span> <span class="o">=</span> <span class="n">conv_layer</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span> <span class="c1"># conv_out.size() = (batch_size, out_channels, dim, 1)</span>
|
||||
<span class="n">activation</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">conv_out</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span> <span class="c1"># activation.size() = (batch_size, out_channels, dim1)</span>
|
||||
<span class="n">max_out</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">max_pool1d</span><span class="p">(</span><span class="n">activation</span><span class="p">,</span> <span class="n">activation</span><span class="o">.</span><span class="n">size</span><span class="p">()[</span><span class="mi">2</span><span class="p">])</span><span class="o">.</span><span class="n">squeeze</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="c1"># maxpool_out.size() = (batch_size, out_channels)</span>
|
||||
<span class="k">return</span> <span class="n">max_out</span>
|
||||
|
||||
<div class="viewcode-block" id="CNNnet.document_embedding">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet.document_embedding">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">document_embedding</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">input</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Embeds documents (i.e., performs the forward pass up to the</span>
|
||||
<span class="sd"> next-to-last layer).</span>
|
||||
|
||||
<span class="sd"> :param input: a batch of instances, typically generated by a torch's `DataLoader`</span>
|
||||
<span class="sd"> instance (see :class:`quapy.classification.neural.TorchDataset`)</span>
|
||||
<span class="sd"> :return: a torch tensor of shape `(n_samples, n_dimensions)`, where</span>
|
||||
<span class="sd"> `n_samples` is the number of documents, and `n_dimensions` is the</span>
|
||||
<span class="sd"> dimensionality of the embedding</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="nb">input</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">word_embedding</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
|
||||
<span class="nb">input</span> <span class="o">=</span> <span class="nb">input</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># input.size() = (batch_size, 1, num_seq, embedding_length)</span>
|
||||
|
||||
<span class="n">max_out1</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv1</span><span class="p">)</span>
|
||||
<span class="n">max_out2</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv2</span><span class="p">)</span>
|
||||
<span class="n">max_out3</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__conv_block</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">conv3</span><span class="p">)</span>
|
||||
|
||||
<span class="n">all_out</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">max_out1</span><span class="p">,</span> <span class="n">max_out2</span><span class="p">,</span> <span class="n">max_out3</span><span class="p">),</span> <span class="mi">1</span><span class="p">)</span> <span class="c1"># all_out.size() = (batch_size, num_kernels*out_channels)</span>
|
||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">relu</span><span class="p">(</span><span class="n">all_out</span><span class="p">))</span> <span class="c1"># (batch_size, num_kernels*out_channels)</span>
|
||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">doc_embedder</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">abstracted</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="CNNnet.get_params">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.neural.CNNnet.get_params">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Get hyper-parameters for this estimator</span>
|
||||
|
||||
<span class="sd"> :return: a dictionary with parameter names mapped to their values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">hyperparams</span></div>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Return the size of the vocabulary</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size_</span></div>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,268 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.classification.svmperf — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.classification.svmperf</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.classification.svmperf</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">random</span>
|
||||
<span class="kn">import</span> <span class="nn">shutil</span>
|
||||
<span class="kn">import</span> <span class="nn">subprocess</span>
|
||||
<span class="kn">import</span> <span class="nn">tempfile</span>
|
||||
<span class="kn">from</span> <span class="nn">os</span> <span class="kn">import</span> <span class="n">remove</span><span class="p">,</span> <span class="n">makedirs</span>
|
||||
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">join</span><span class="p">,</span> <span class="n">exists</span>
|
||||
<span class="kn">from</span> <span class="nn">subprocess</span> <span class="kn">import</span> <span class="n">PIPE</span><span class="p">,</span> <span class="n">STDOUT</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">ClassifierMixin</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">dump_svmlight_file</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="SVMperf">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">SVMperf</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">ClassifierMixin</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""A wrapper for the `SVM-perf package <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`__ by Thorsten Joachims.</span>
|
||||
<span class="sd"> When using losses for quantification, the source code has to be patched. See</span>
|
||||
<span class="sd"> the `installation documentation <https://hlt-isti.github.io/QuaPy/build/html/Installation.html#svm-perf-with-quantification-oriented-losses>`__</span>
|
||||
<span class="sd"> for further details.</span>
|
||||
|
||||
<span class="sd"> References:</span>
|
||||
|
||||
<span class="sd"> * `Esuli et al.2015 <https://dl.acm.org/doi/abs/10.1145/2700406?casa_token=8D2fHsGCVn0AAAAA:ZfThYOvrzWxMGfZYlQW_y8Cagg-o_l6X_PcF09mdETQ4Tu7jK98mxFbGSXp9ZSO14JkUIYuDGFG0>`__</span>
|
||||
<span class="sd"> * `Barranquero et al.2015 <https://www.sciencedirect.com/science/article/abs/pii/S003132031400291X>`__</span>
|
||||
|
||||
<span class="sd"> :param svmperf_base: path to directory containing the binary files `svm_perf_learn` and `svm_perf_classify`</span>
|
||||
<span class="sd"> :param C: trade-off between training error and margin (default 0.01)</span>
|
||||
<span class="sd"> :param verbose: set to True to print svm-perf std outputs</span>
|
||||
<span class="sd"> :param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae".</span>
|
||||
<span class="sd"> :param host_folder: directory where to store the trained model; set to None (default) for using a tmp directory</span>
|
||||
<span class="sd"> (temporal directories are automatically deleted)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="c1"># losses with their respective codes in svm_perf implementation</span>
|
||||
<span class="n">valid_losses</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'01'</span><span class="p">:</span><span class="mi">0</span><span class="p">,</span> <span class="s1">'f1'</span><span class="p">:</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'kld'</span><span class="p">:</span><span class="mi">12</span><span class="p">,</span> <span class="s1">'nkld'</span><span class="p">:</span><span class="mi">13</span><span class="p">,</span> <span class="s1">'q'</span><span class="p">:</span><span class="mi">22</span><span class="p">,</span> <span class="s1">'qacc'</span><span class="p">:</span><span class="mi">23</span><span class="p">,</span> <span class="s1">'qf1'</span><span class="p">:</span><span class="mi">24</span><span class="p">,</span> <span class="s1">'qgm'</span><span class="p">:</span><span class="mi">25</span><span class="p">,</span> <span class="s1">'mae'</span><span class="p">:</span><span class="mi">26</span><span class="p">,</span> <span class="s1">'mrae'</span><span class="p">:</span><span class="mi">27</span><span class="p">}</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">svmperf_base</span><span class="p">,</span> <span class="n">C</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">loss</span><span class="o">=</span><span class="s1">'01'</span><span class="p">,</span> <span class="n">host_folder</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="n">exists</span><span class="p">(</span><span class="n">svmperf_base</span><span class="p">),</span> <span class="sa">f</span><span class="s1">'path </span><span class="si">{</span><span class="n">svmperf_base</span><span class="si">}</span><span class="s1"> does not seem to point to a valid path'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span> <span class="o">=</span> <span class="n">svmperf_base</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">C</span> <span class="o">=</span> <span class="n">C</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">loss</span> <span class="o">=</span> <span class="n">loss</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span> <span class="o">=</span> <span class="n">host_folder</span>
|
||||
|
||||
<span class="c1"># def set_params(self, **parameters):</span>
|
||||
<span class="c1"># """</span>
|
||||
<span class="c1"># Set the hyper-parameters for svm-perf. Currently, only the `C` and `loss` parameters are supported</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># :param parameters: a `**kwargs` dictionary `{'C': <float>}`</span>
|
||||
<span class="c1"># """</span>
|
||||
<span class="c1"># assert sorted(list(parameters.keys())) == ['C', 'loss'], \</span>
|
||||
<span class="c1"># 'currently, only the C and loss parameters are supported'</span>
|
||||
<span class="c1"># self.C = parameters.get('C', self.C)</span>
|
||||
<span class="c1"># self.loss = parameters.get('loss', self.loss)</span>
|
||||
<span class="c1">#</span>
|
||||
<span class="c1"># def get_params(self, deep=True):</span>
|
||||
<span class="c1"># return {'C': self.C, 'loss': self.loss}</span>
|
||||
|
||||
<div class="viewcode-block" id="SVMperf.fit">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.fit">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Trains the SVM for the multivariate performance loss</span>
|
||||
|
||||
<span class="sd"> :param X: training instances</span>
|
||||
<span class="sd"> :param y: a binary vector of labels</span>
|
||||
<span class="sd"> :return: `self`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss</span> <span class="ow">in</span> <span class="n">SVMperf</span><span class="o">.</span><span class="n">valid_losses</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'unsupported loss </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="si">}</span><span class="s1">, valid ones are </span><span class="si">{</span><span class="nb">list</span><span class="p">(</span><span class="n">SVMperf</span><span class="o">.</span><span class="n">valid_losses</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span><span class="si">}</span><span class="s1">'</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_learn</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="s1">'svm_perf_learn'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">svmperf_classify</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_base</span><span class="p">,</span> <span class="s1">'svm_perf_classify'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">loss_cmd</span> <span class="o">=</span> <span class="s1">'-w 3 -l '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">valid_losses</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">loss</span><span class="p">])</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">c_cmd</span> <span class="o">=</span> <span class="s1">'-c '</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">C</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes_</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
|
||||
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
|
||||
<span class="c1"># this would allow to run parallel instances of predict</span>
|
||||
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">'svmperfprocess'</span><span class="o">+</span><span class="s1">'-'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="c1"># tmp dir are removed after the fit terminates in multiprocessing...</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">TemporaryDirectory</span><span class="p">(</span><span class="n">suffix</span><span class="o">=</span><span class="n">random_code</span><span class="p">)</span><span class="o">.</span><span class="n">name</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">host_folder</span><span class="p">,</span> <span class="s1">'.'</span> <span class="o">+</span> <span class="n">random_code</span><span class="p">)</span>
|
||||
<span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">'model-'</span><span class="o">+</span><span class="n">random_code</span><span class="p">)</span>
|
||||
<span class="n">traindat</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'train-</span><span class="si">{</span><span class="n">random_code</span><span class="si">}</span><span class="s1">.dat'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">dump_svmlight_file</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">traindat</span><span class="p">,</span> <span class="n">zero_based</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="n">cmd</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_learn</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">c_cmd</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">loss_cmd</span><span class="p">,</span> <span class="n">traindat</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[Running]'</span><span class="p">,</span> <span class="n">cmd</span><span class="p">)</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">split</span><span class="p">(),</span> <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">STDOUT</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stderr</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span>
|
||||
<span class="n">remove</span><span class="p">(</span><span class="n">traindat</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="SVMperf.predict">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.predict">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Predicts labels for the instances `X`</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` instances to classify</span>
|
||||
<span class="sd"> :return: a `numpy` array of length `n` containing the label predictions, where `n` is the number of</span>
|
||||
<span class="sd"> instances in `X`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">confidence_scores</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">decision_function</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="p">(</span><span class="n">confidence_scores</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span> <span class="o">*</span> <span class="mi">1</span>
|
||||
<span class="k">return</span> <span class="n">predictions</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="SVMperf.decision_function">
|
||||
<a class="viewcode-back" href="../../../quapy.classification.html#quapy.classification.svmperf.SVMperf.decision_function">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">decision_function</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Evaluate the decision function for the samples in `X`.</span>
|
||||
|
||||
<span class="sd"> :param X: array-like of shape `(n_samples, n_features)` containing the instances to classify</span>
|
||||
<span class="sd"> :param y: unused</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_samples,)` containing the decision scores of the instances</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'tmpdir'</span><span class="p">),</span> <span class="s1">'predict called before fit'</span>
|
||||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'model directory corrupted'</span>
|
||||
<span class="k">assert</span> <span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">),</span> <span class="s1">'model not found'</span>
|
||||
<span class="k">if</span> <span class="n">y</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
|
||||
<span class="c1"># in order to allow for parallel runs of predict, a random code is assigned</span>
|
||||
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
|
||||
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">'-'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
|
||||
<span class="n">predictions_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">'predictions'</span> <span class="o">+</span> <span class="n">random_code</span> <span class="o">+</span> <span class="s1">'.dat'</span><span class="p">)</span>
|
||||
<span class="n">testdat</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="s1">'test'</span> <span class="o">+</span> <span class="n">random_code</span> <span class="o">+</span> <span class="s1">'.dat'</span><span class="p">)</span>
|
||||
<span class="n">dump_svmlight_file</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">testdat</span><span class="p">,</span> <span class="n">zero_based</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="n">cmd</span> <span class="o">=</span> <span class="s1">' '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">svmperf_classify</span><span class="p">,</span> <span class="n">testdat</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions_path</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[Running]'</span><span class="p">,</span> <span class="n">cmd</span><span class="p">)</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">cmd</span><span class="o">.</span><span class="n">split</span><span class="p">(),</span> <span class="n">stdout</span><span class="o">=</span><span class="n">PIPE</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">STDOUT</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">stdout</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s1">'utf-8'</span><span class="p">))</span>
|
||||
|
||||
<span class="n">scores</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">loadtxt</span><span class="p">(</span><span class="n">predictions_path</span><span class="p">)</span>
|
||||
<span class="n">remove</span><span class="p">(</span><span class="n">testdat</span><span class="p">)</span>
|
||||
<span class="n">remove</span><span class="p">(</span><span class="n">predictions_path</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">scores</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="fm">__del__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'tmpdir'</span><span class="p">):</span>
|
||||
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">tmpdir</span><span class="p">,</span> <span class="n">ignore_errors</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,165 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.data._ifcb — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.data._ifcb</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.data._ifcb</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span>
|
||||
|
||||
<div class="viewcode-block" id="IFCBTrainSamplesFromDir">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTrainSamplesFromDir">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">classes</span><span class="p">:</span> <span class="nb">list</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes</span> <span class="o">=</span> <span class="n">classes</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">filename</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">listdir</span><span class="p">(</span><span class="n">path_dir</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">filename</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s1">'.csv'</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">filename</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">:</span>
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span><span class="n">sample</span><span class="p">))</span>
|
||||
<span class="c1"># all columns but the first where we get the class</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
|
||||
<span class="k">yield</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span>
|
||||
|
||||
<div class="viewcode-block" id="IFCBTrainSamplesFromDir.total">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTrainSamplesFromDir.total">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the total number of samples that the protocol generates.</span>
|
||||
|
||||
<span class="sd"> :return: The number of training samples to generate.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="IFCBTestSamples">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTestSamples">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">IFCBTestSamples</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">test_prevalences_path</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path_dir</span><span class="p">,</span> <span class="n">test_prevalences_path</span><span class="p">))</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">for</span> <span class="n">_</span><span class="p">,</span> <span class="n">test_sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
|
||||
<span class="c1">#Load the sample from disk</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span><span class="n">test_sample</span><span class="p">[</span><span class="s1">'sample'</span><span class="p">]</span><span class="o">+</span><span class="s1">'.csv'</span><span class="p">))</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">test_sample</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span>
|
||||
<span class="k">yield</span> <span class="n">X</span><span class="p">,</span> <span class="n">prevalences</span>
|
||||
|
||||
<div class="viewcode-block" id="IFCBTestSamples.total">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._ifcb.IFCBTestSamples.total">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the total number of samples that the protocol generates.</span>
|
||||
|
||||
<span class="sd"> :return: The number of test samples to generate.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">test_prevalences</span><span class="o">.</span><span class="n">index</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,307 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.data._lequa2022 — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.data._lequa2022</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.data._lequa2022</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Union</span>
|
||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span>
|
||||
|
||||
<span class="n">DEV_SAMPLES</span> <span class="o">=</span> <span class="mi">1000</span>
|
||||
<span class="n">TEST_SAMPLES</span> <span class="o">=</span> <span class="mi">5000</span>
|
||||
|
||||
<span class="n">ERROR_TOL</span> <span class="o">=</span> <span class="mf">1E-3</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="load_category_map">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_category_map">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">load_category_map</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
||||
<span class="n">cat2code</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">)</span> <span class="k">as</span> <span class="n">fin</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">fin</span><span class="p">:</span>
|
||||
<span class="n">category</span><span class="p">,</span> <span class="n">code</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
|
||||
<span class="n">cat2code</span><span class="p">[</span><span class="n">category</span><span class="p">]</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">code</span><span class="p">)</span>
|
||||
<span class="n">code2cat</span> <span class="o">=</span> <span class="p">[</span><span class="n">cat</span> <span class="k">for</span> <span class="n">cat</span><span class="p">,</span> <span class="n">code</span> <span class="ow">in</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">cat2code</span><span class="o">.</span><span class="n">items</span><span class="p">(),</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="mi">1</span><span class="p">])]</span>
|
||||
<span class="k">return</span> <span class="n">cat2code</span><span class="p">,</span> <span class="n">code2cat</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="load_raw_documents">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_raw_documents">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">load_raw_documents</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
|
||||
<span class="n">documents</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s2">"text"</span><span class="p">]</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
|
||||
<span class="n">labels</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">if</span> <span class="s2">"label"</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
|
||||
<span class="n">labels</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s2">"label"</span><span class="p">]</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">documents</span><span class="p">,</span> <span class="n">labels</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="load_vector_documents">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.load_vector_documents">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">load_vector_documents</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
||||
<span class="n">D</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
||||
<span class="n">labelled</span> <span class="o">=</span> <span class="n">D</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="mi">301</span>
|
||||
<span class="k">if</span> <span class="n">labelled</span><span class="p">:</span>
|
||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">D</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:],</span> <span class="n">D</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">D</span><span class="p">,</span> <span class="kc">None</span>
|
||||
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="SamplesFromDir">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.SamplesFromDir">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">SamplesFromDir</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path_dir</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">ground_truth_path</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">load_fn</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span> <span class="o">=</span> <span class="n">path_dir</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_fn</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">true_prevs</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">ground_truth_path</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">for</span> <span class="nb">id</span><span class="p">,</span> <span class="n">prevalence</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">true_prevs</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
|
||||
<span class="n">sample</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">load_fn</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">path_dir</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="nb">id</span><span class="si">}</span><span class="s1">.txt'</span><span class="p">))</span>
|
||||
<span class="k">yield</span> <span class="n">sample</span><span class="p">,</span> <span class="n">prevalence</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ResultSubmission">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">ResultSubmission</span><span class="p">:</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__init_df</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">categories</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">categories</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">or</span> <span class="n">categories</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s1">'wrong format for categories: an int (>=2) was expected'</span><span class="p">)</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">categories</span><span class="p">)))</span>
|
||||
<span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">set_names</span><span class="p">(</span><span class="s1">'id'</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">n_categories</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="ResultSubmission.add">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.add">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">add</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">prevalence_values</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sample_id</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: expected int for sample_sample, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">sample_id</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: expected np.ndarray for prevalence_values, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">__init_df</span><span class="p">(</span><span class="n">categories</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">prevalence_values</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">sample_id</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">values</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: prevalence values for "</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">" already added'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">prevalence_values</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">1</span> <span class="ow">and</span> <span class="n">prevalence_values</span><span class="o">.</span><span class="n">size</span> <span class="o">!=</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_categories</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: wrong shape found for prevalence vector </span><span class="si">{</span><span class="n">prevalence_values</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">prevalence_values</span> <span class="o"><</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span> <span class="ow">or</span> <span class="p">(</span><span class="n">prevalence_values</span> <span class="o">></span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: prevalence values out of range [0,1] for "</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">"'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">prevalence_values</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">></span> <span class="n">ERROR_TOL</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error: prevalence values do not sum up to one for "</span><span class="si">{</span><span class="n">sample_id</span><span class="si">}</span><span class="s1">"'</span>
|
||||
<span class="sa">f</span><span class="s1">'(error tolerance </span><span class="si">{</span><span class="n">ERROR_TOL</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">sample_id</span><span class="p">]</span> <span class="o">=</span> <span class="n">prevalence_values</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="ResultSubmission.load">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.load">[docs]</a>
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s1">'ResultSubmission'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_file_format</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
|
||||
<span class="n">r</span> <span class="o">=</span> <span class="n">ResultSubmission</span><span class="p">()</span>
|
||||
<span class="n">r</span><span class="o">.</span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span>
|
||||
<span class="k">return</span> <span class="n">r</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ResultSubmission.dump">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.dump">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">dump</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_dataframe_format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ResultSubmission.prevalence">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.prevalence">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">prevalence</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="n">sel</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">sample_id</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">sel</span><span class="o">.</span><span class="n">empty</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">sel</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ResultSubmission.iterrows">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.iterrows">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">iterrows</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">row</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">df</span><span class="o">.</span><span class="n">iterrows</span><span class="p">():</span>
|
||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">row</span><span class="o">.</span><span class="n">values</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
||||
<span class="k">yield</span> <span class="n">index</span><span class="p">,</span> <span class="n">prevalence</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ResultSubmission.check_file_format">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.check_file_format">[docs]</a>
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">check_file_format</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">index_col</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the file </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1"> does not seem to be a valid csv file. '</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">e</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">ResultSubmission</span><span class="o">.</span><span class="n">check_dataframe_format</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ResultSubmission.check_dataframe_format">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data._lequa2022.ResultSubmission.check_dataframe_format">[docs]</a>
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">check_dataframe_format</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">df</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="n">Union</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">,</span> <span class="nb">str</span><span class="p">]]:</span>
|
||||
<span class="n">hint_path</span> <span class="o">=</span> <span class="s1">''</span> <span class="c1"># if given, show the data path in the error message</span>
|
||||
<span class="k">if</span> <span class="n">path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">hint_path</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">' in </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">'</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">name</span> <span class="o">!=</span> <span class="s1">'id'</span> <span class="ow">or</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">)</span> <span class="o"><</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'wrong header</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">, '</span>
|
||||
<span class="sa">f</span><span class="s1">'the format of the header should be "id,0,...,n-1", '</span>
|
||||
<span class="sa">f</span><span class="s1">'where n is the number of categories'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="p">[</span><span class="nb">int</span><span class="p">(</span><span class="n">ci</span><span class="p">)</span> <span class="k">for</span> <span class="n">ci</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="o">.</span><span class="n">values</span><span class="p">]</span> <span class="o">!=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">))):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'wrong header</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">, category ids should be 0,1,2,...,n-1, '</span>
|
||||
<span class="sa">f</span><span class="s1">'where n is the number of categories'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">df</span><span class="o">.</span><span class="n">empty</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: results file is empty'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="o">!=</span> <span class="n">DEV_SAMPLES</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> <span class="o">!=</span> <span class="n">TEST_SAMPLES</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'wrong number of prevalence values found</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">; '</span>
|
||||
<span class="sa">f</span><span class="s1">'expected </span><span class="si">{</span><span class="n">DEV_SAMPLES</span><span class="si">}</span><span class="s1"> for development sets and '</span>
|
||||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">TEST_SAMPLES</span><span class="si">}</span><span class="s1"> for test sets; found </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">values</span><span class="p">)</span>
|
||||
<span class="n">expected_ids</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">df</span><span class="p">)))</span>
|
||||
<span class="k">if</span> <span class="n">ids</span> <span class="o">!=</span> <span class="n">expected_ids</span><span class="p">:</span>
|
||||
<span class="n">missing</span> <span class="o">=</span> <span class="n">expected_ids</span> <span class="o">-</span> <span class="n">ids</span>
|
||||
<span class="k">if</span> <span class="n">missing</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'there are </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1"> missing ids</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: </span><span class="si">{</span><span class="nb">sorted</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="n">unexpected</span> <span class="o">=</span> <span class="n">ids</span> <span class="o">-</span> <span class="n">expected_ids</span>
|
||||
<span class="k">if</span> <span class="n">unexpected</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'there are </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">missing</span><span class="p">)</span><span class="si">}</span><span class="s1"> unexpected ids</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1">: </span><span class="si">{</span><span class="nb">sorted</span><span class="p">(</span><span class="n">unexpected</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">category_id</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">category_id</span><span class="p">]</span> <span class="o"><</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span> <span class="ow">or</span> <span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="n">category_id</span><span class="p">]</span> <span class="o">></span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error</span><span class="si">{</span><span class="n">hint_path</span><span class="si">}</span><span class="s1"> column "</span><span class="si">{</span><span class="n">category_id</span><span class="si">}</span><span class="s1">" contains values out of range [0,1]'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">round_errors</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">prevs</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span> <span class="o">-</span> <span class="mf">1.</span><span class="p">)</span> <span class="o">></span> <span class="n">ERROR_TOL</span>
|
||||
<span class="k">if</span> <span class="n">round_errors</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'warning: prevalence values in rows with id </span><span class="si">{</span><span class="n">np</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">round_errors</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'do not sum up to 1 (error tolerance </span><span class="si">{</span><span class="n">ERROR_TOL</span><span class="si">}</span><span class="s1">), '</span>
|
||||
<span class="sa">f</span><span class="s1">'probably due to some rounding errors.'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">df</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,728 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.data.base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.data.base</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.data.base</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
|
||||
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">cached_property</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Iterable</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">issparse</span>
|
||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">vstack</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">train_test_split</span><span class="p">,</span> <span class="n">RepeatedStratifiedKFold</span>
|
||||
<span class="kn">from</span> <span class="nn">numpy.random</span> <span class="kn">import</span> <span class="n">RandomState</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">strprev</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">temp_seed</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">LabelledCollection</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> A LabelledCollection is a set of objects each with a label attached to each of them. </span>
|
||||
<span class="sd"> This class implements several sampling routines and other utilities.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> :param instances: array-like (np.ndarray, list, or csr_matrix are supported)</span>
|
||||
<span class="sd"> :param labels: array-like with the same length of instances</span>
|
||||
<span class="sd"> :param classes: optional, list of classes from which labels are taken. If not specified, the classes are inferred</span>
|
||||
<span class="sd"> from the labels. The classes must be indicated in cases in which some of the labels might have no examples</span>
|
||||
<span class="sd"> (i.e., a prevalence of 0)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">instances</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">instances</span>
|
||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="c1"># lists of strings occupy too much as ndarrays (although python-objects add a heavy overload)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">labels</span><span class="p">)</span>
|
||||
<span class="n">n_docs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">classes</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">classes</span><span class="p">))</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">difference</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">classes</span><span class="p">)))</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'labels (</span><span class="si">{</span><span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span><span class="si">}</span><span class="s1">) contain values not included in classes_ (</span><span class="si">{</span><span class="nb">set</span><span class="p">(</span><span class="n">classes</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="p">{</span><span class="n">class_</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">n_docs</span><span class="p">)[</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">==</span> <span class="n">class_</span><span class="p">]</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">}</span>
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.load">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.load">[docs]</a>
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">:</span> <span class="n">callable</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads a labelled set of data and convert it into a :class:`LabelledCollection` instance. The function in charge</span>
|
||||
<span class="sd"> of reading the instances must be specified. This function can be a custom one, or any of the reading functions</span>
|
||||
<span class="sd"> defined in :mod:`quapy.data.reader` module.</span>
|
||||
|
||||
<span class="sd"> :param path: string, the path to the file containing the labelled instances</span>
|
||||
<span class="sd"> :param loader_func: a custom function that implements the data loader and returns a tuple with instances and</span>
|
||||
<span class="sd"> labels</span>
|
||||
<span class="sd"> :param classes: array-like, the classes according to which the instances are labelled</span>
|
||||
<span class="sd"> :param loader_kwargs: any argument that the `loader_func` function needs in order to read the instances, i.e.,</span>
|
||||
<span class="sd"> these arguments are used to call `loader_func(path, **loader_kwargs)`</span>
|
||||
<span class="sd"> :return: a :class:`LabelledCollection` object</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="o">*</span><span class="n">loader_func</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">),</span> <span class="n">classes</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="fm">__len__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the length of this collection (number of labelled instances)</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.prevalence">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.prevalence">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">prevalence</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the prevalence, or relative frequency, of the classes in the codeframe.</span>
|
||||
|
||||
<span class="sd"> :return: a np.ndarray of shape `(n_classes)` with the relative frequencies of each class, in the same order</span>
|
||||
<span class="sd"> as listed by `self.classes_`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">counts</span><span class="p">()</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.counts">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.counts">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">counts</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the number of instances for each of the classes in the codeframe.</span>
|
||||
|
||||
<span class="sd"> :return: a np.ndarray of shape `(n_classes)` with the number of instances of each class, in the same order</span>
|
||||
<span class="sd"> as listed by `self.classes_`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">])</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">])</span></div>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">n_classes</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> The number of classes</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns True if the number of classes is 2</span>
|
||||
|
||||
<span class="sd"> :return: boolean</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">==</span> <span class="mi">2</span>
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.sampling_index">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling_index">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the</span>
|
||||
<span class="sd"> prevalence values are not specified, then returns the index of a uniform sampling.</span>
|
||||
<span class="sd"> For each class, the sampling is drawn with replacement if the requested prevalence is larger than</span>
|
||||
<span class="sd"> the actual prevalence of the class, or without replacement otherwise.</span>
|
||||
|
||||
<span class="sd"> :param size: integer, the requested size</span>
|
||||
<span class="sd"> :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since</span>
|
||||
<span class="sd"> it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in</span>
|
||||
<span class="sd"> `self.classes_` can be specified, while the other class takes prevalence value `1-p`</span>
|
||||
<span class="sd"> :param shuffle: if set to True (default), shuffles the index before returning it</span>
|
||||
<span class="sd"> :param random_state: seed for reproducing sampling</span>
|
||||
<span class="sd"> :return: a np.ndarray of shape `(size)` with the indexes</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> <span class="c1"># no prevalence was indicated; returns an index for uniform sampling</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">prevs</span> <span class="o">+</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">),)</span>
|
||||
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="s1">'unexpected number of prevalences'</span>
|
||||
<span class="k">assert</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'prevalences (</span><span class="si">{</span><span class="n">prevs</span><span class="si">}</span><span class="s1">) wrong range (sum=</span><span class="si">{</span><span class="nb">sum</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span>
|
||||
|
||||
<span class="c1"># Decide how many instances should be taken for each class in order to satisfy the requested prevalence</span>
|
||||
<span class="c1"># accurately, and the number of instances in the sample (exactly). If int(size * prevs[i]) (which is</span>
|
||||
<span class="c1"># <= size * prevs[i]) examples are drawn from class i, there could be a remainder number of instances to take</span>
|
||||
<span class="c1"># to satisfy the size constrain. The remainder is distributed along the classes with probability = prevs.</span>
|
||||
<span class="c1"># (This aims at avoiding the remainder to be placed in a class for which the prevalence requested is 0.)</span>
|
||||
<span class="n">n_requests</span> <span class="o">=</span> <span class="p">{</span><span class="n">class_</span><span class="p">:</span> <span class="nb">round</span><span class="p">(</span><span class="n">size</span> <span class="o">*</span> <span class="n">prevs</span><span class="p">[</span><span class="n">i</span><span class="p">])</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">class_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)}</span>
|
||||
<span class="n">remainder</span> <span class="o">=</span> <span class="n">size</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">n_requests</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||||
<span class="k">with</span> <span class="n">temp_seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">):</span>
|
||||
<span class="c1"># due to rounding, the remainder can be 0, >0, or <0</span>
|
||||
<span class="k">if</span> <span class="n">remainder</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="c1"># when the remainder is >0 we randomly add 1 to the requests for each class;</span>
|
||||
<span class="c1"># more prevalent classes are more likely to be taken in order to minimize the impact in the final prevalence</span>
|
||||
<span class="k">for</span> <span class="n">rand_class</span> <span class="ow">in</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">remainder</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">prevs</span><span class="p">):</span>
|
||||
<span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="k">elif</span> <span class="n">remainder</span> <span class="o"><</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="c1"># when the remainder is <0 we randomly remove 1 from the requests, unless the request is 0 for a chosen</span>
|
||||
<span class="c1"># class; we repeat until remainder==0</span>
|
||||
<span class="k">while</span> <span class="n">remainder</span><span class="o">!=</span><span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">rand_class</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span> <span class="n">p</span><span class="o">=</span><span class="n">prevs</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">n_requests</span><span class="p">[</span><span class="n">rand_class</span><span class="p">]</span> <span class="o">-=</span> <span class="mi">1</span>
|
||||
<span class="n">remainder</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
|
||||
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">class_</span><span class="p">,</span> <span class="n">n_requested</span> <span class="ow">in</span> <span class="n">n_requests</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="n">n_candidates</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">])</span>
|
||||
<span class="n">index_sample</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="n">class_</span><span class="p">][</span>
|
||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">n_candidates</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="n">n_requested</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="p">(</span><span class="n">n_requested</span> <span class="o">></span> <span class="n">n_candidates</span><span class="p">))</span>
|
||||
<span class="p">]</span> <span class="k">if</span> <span class="n">n_requested</span> <span class="o">></span> <span class="mi">0</span> <span class="k">else</span> <span class="p">[]</span>
|
||||
|
||||
<span class="n">indexes_sample</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_sample</span><span class="p">)</span>
|
||||
|
||||
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span><span class="n">indexes_sample</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">shuffle</span><span class="p">:</span>
|
||||
<span class="n">indexes_sample</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">permutation</span><span class="p">(</span><span class="n">indexes_sample</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">indexes_sample</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.uniform_sampling_index">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.uniform_sampling_index">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">uniform_sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn</span>
|
||||
<span class="sd"> with replacement if the requested size is greater than the number of instances, or without replacement</span>
|
||||
<span class="sd"> otherwise.</span>
|
||||
|
||||
<span class="sd"> :param size: integer, the size of the uniform sample</span>
|
||||
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
|
||||
<span class="sd"> :return: a np.ndarray of shape `(size)` with the indexes</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">ng</span> <span class="o">=</span> <span class="n">RandomState</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">ng</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span>
|
||||
<span class="k">return</span> <span class="n">ng</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> <span class="n">size</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="n">size</span> <span class="o">></span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.sampling">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">sampling</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence</span>
|
||||
<span class="sd"> values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than</span>
|
||||
<span class="sd"> the actual prevalence of the class, or with replacement otherwise.</span>
|
||||
|
||||
<span class="sd"> :param size: integer, the requested size</span>
|
||||
<span class="sd"> :param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since</span>
|
||||
<span class="sd"> it is constrained. E.g., for binary collections, only the prevalence `p` for the first class (as listed in</span>
|
||||
<span class="sd"> `self.classes_` can be specified, while the other class takes prevalence value `1-p`</span>
|
||||
<span class="sd"> :param shuffle: if set to True (default), shuffles the index before returning it</span>
|
||||
<span class="sd"> :param random_state: seed for reproducing sampling</span>
|
||||
<span class="sd"> :return: an instance of :class:`LabelledCollection` with length == `size` and prevalence close to `prevs` (or</span>
|
||||
<span class="sd"> prevalence == `prevs` if the exact prevalence values can be met as proportions of instances)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">prev_index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="n">shuffle</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">prev_index</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.uniform_sampling">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.uniform_sampling">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">uniform_sampling</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn</span>
|
||||
<span class="sd"> with replacement if the requested size is greater than the number of instances, or without replacement</span>
|
||||
<span class="sd"> otherwise.</span>
|
||||
|
||||
<span class="sd"> :param size: integer, the requested size</span>
|
||||
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
|
||||
<span class="sd"> :return: an instance of :class:`LabelledCollection` with length == `size`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">unif_index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">unif_index</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.sampling_from_index">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.sampling_from_index">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">sampling_from_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns an instance of :class:`LabelledCollection` whose elements are sampled from this collection using the</span>
|
||||
<span class="sd"> index.</span>
|
||||
|
||||
<span class="sd"> :param index: np.ndarray</span>
|
||||
<span class="sd"> :return: an instance of :class:`LabelledCollection`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">documents</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
||||
<span class="n">labels</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">documents</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.split_stratified">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.split_stratified">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">split_stratified</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">train_prop</span><span class="o">=</span><span class="mf">0.6</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns two instances of :class:`LabelledCollection` split with stratification from this collection, at desired</span>
|
||||
<span class="sd"> proportion.</span>
|
||||
|
||||
<span class="sd"> :param train_prop: the proportion of elements to include in the left-most returned collection (typically used</span>
|
||||
<span class="sd"> as the training collection). The rest of elements are included in the right-most returned collection</span>
|
||||
<span class="sd"> (typically used as a test collection).</span>
|
||||
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
|
||||
<span class="sd"> :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the</span>
|
||||
<span class="sd"> second one with `1-train_prop` elements</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">tr_docs</span><span class="p">,</span> <span class="n">te_docs</span><span class="p">,</span> <span class="n">tr_labels</span><span class="p">,</span> <span class="n">te_labels</span> <span class="o">=</span> <span class="n">train_test_split</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=</span><span class="n">train_prop</span><span class="p">,</span> <span class="n">stratify</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">tr_docs</span><span class="p">,</span> <span class="n">tr_labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">te_docs</span><span class="p">,</span> <span class="n">te_labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">training</span><span class="p">,</span> <span class="n">test</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.split_random">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.split_random">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">split_random</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">train_prop</span><span class="o">=</span><span class="mf">0.6</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns two instances of :class:`LabelledCollection` split randomly from this collection, at desired</span>
|
||||
<span class="sd"> proportion.</span>
|
||||
|
||||
<span class="sd"> :param train_prop: the proportion of elements to include in the left-most returned collection (typically used</span>
|
||||
<span class="sd"> as the training collection). The rest of elements are included in the right-most returned collection</span>
|
||||
<span class="sd"> (typically used as a test collection).</span>
|
||||
<span class="sd"> :param random_state: if specified, guarantees reproducibility of the split.</span>
|
||||
<span class="sd"> :return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the</span>
|
||||
<span class="sd"> second one with `1-train_prop` elements</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">indexes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">RandomState</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span><span class="o">.</span><span class="n">permutation</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">train_prop</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="n">train_prop</span> <span class="o"><</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span> \
|
||||
<span class="s1">'argument train_prop cannot be greater than the number of elements in the collection'</span>
|
||||
<span class="n">splitpoint</span> <span class="o">=</span> <span class="n">train_prop</span>
|
||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">train_prop</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="mi">0</span> <span class="o"><</span> <span class="n">train_prop</span> <span class="o"><</span> <span class="mi">1</span><span class="p">,</span> \
|
||||
<span class="s1">'argument train_prop out of range (0,1)'</span>
|
||||
<span class="n">splitpoint</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span><span class="o">*</span><span class="n">train_prop</span><span class="p">))</span>
|
||||
<span class="n">left</span><span class="p">,</span> <span class="n">right</span> <span class="o">=</span> <span class="n">indexes</span><span class="p">[:</span><span class="n">splitpoint</span><span class="p">],</span> <span class="n">indexes</span><span class="p">[</span><span class="n">splitpoint</span><span class="p">:]</span>
|
||||
<span class="n">training</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">left</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">right</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">training</span><span class="p">,</span> <span class="n">test</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="fm">__add__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns a new :class:`LabelledCollection` as the union of this collection with another collection.</span>
|
||||
<span class="sd"> Both labelled collections must have the same classes.</span>
|
||||
|
||||
<span class="sd"> :param other: another :class:`LabelledCollection`</span>
|
||||
<span class="sd"> :return: a :class:`LabelledCollection` representing the union of both collections</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span><span class="o">==</span><span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">classes_</span><span class="p">)):</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'unsupported operation for collections on different classes; '</span>
|
||||
<span class="sa">f</span><span class="s1">'expected </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="n">other</span><span class="o">.</span><span class="n">classes_</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.join">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.join">[docs]</a>
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">join</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="s1">'LabelledCollection'</span><span class="p">]):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns a new :class:`LabelledCollection` as the union of the collections given in input.</span>
|
||||
|
||||
<span class="sd"> :param args: instances of :class:`LabelledCollection`</span>
|
||||
<span class="sd"> :return: a :class:`LabelledCollection` representing the union of both collections</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="p">[</span><span class="n">lc</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span> <span class="k">if</span> <span class="n">lc</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">]</span>
|
||||
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">args</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'empty list is not allowed for mix'</span>
|
||||
|
||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">lc</span><span class="p">,</span> <span class="n">LabelledCollection</span><span class="p">)</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">]),</span> \
|
||||
<span class="s1">'only instances of LabelledCollection allowed'</span>
|
||||
|
||||
<span class="n">first_instances</span> <span class="o">=</span> <span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">instances</span>
|
||||
<span class="n">first_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="n">first_instances</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="nb">type</span><span class="p">(</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span><span class="o">==</span><span class="n">first_type</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
|
||||
<span class="s1">'not all the collections are of instances of the same type'</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">first_instances</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">first_instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||||
<span class="n">first_ndim</span> <span class="o">=</span> <span class="n">first_instances</span><span class="o">.</span><span class="n">ndim</span>
|
||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="n">first_ndim</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
|
||||
<span class="s1">'not all the ndarrays are of the same dimension'</span>
|
||||
<span class="k">if</span> <span class="n">first_ndim</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">first_shape</span> <span class="o">=</span> <span class="n">first_instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
|
||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="o">==</span> <span class="n">first_shape</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]),</span> \
|
||||
<span class="s1">'not all the ndarrays are of the same shape'</span>
|
||||
<span class="k">if</span> <span class="n">issparse</span><span class="p">(</span><span class="n">first_instances</span><span class="p">):</span>
|
||||
<span class="n">instances</span> <span class="o">=</span> <span class="n">vstack</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">instances</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
|
||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">first_instances</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
||||
<span class="n">instances</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">(</span><span class="n">lc</span><span class="o">.</span><span class="n">instances</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">'unsupported operation for collection types'</span><span class="p">)</span>
|
||||
<span class="n">labels</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">lc</span><span class="o">.</span><span class="n">labels</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">args</span><span class="p">])</span>
|
||||
<span class="n">classes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">labels</span><span class="p">)</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">Xy</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Gets the instances and labels. This is useful when working with `sklearn` estimators, e.g.:</span>
|
||||
|
||||
<span class="sd"> >>> svm = LinearSVC().fit(*my_collection.Xy)</span>
|
||||
|
||||
<span class="sd"> :return: a tuple `(instances, labels)` from this collection</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">Xp</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Gets the instances and the true prevalence. This is useful when implementing evaluation protocols from</span>
|
||||
<span class="sd"> a :class:`LabelledCollection` object.</span>
|
||||
|
||||
<span class="sd"> :return: a tuple `(instances, prevalence)` from this collection</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">X</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> An alias to self.instances</span>
|
||||
|
||||
<span class="sd"> :return: self.instances</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">y</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> An alias to self.labels</span>
|
||||
|
||||
<span class="sd"> :return: self.labels</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">p</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> An alias to self.prevalence()</span>
|
||||
|
||||
<span class="sd"> :return: self.prevalence()</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.stats">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.stats">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">stats</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">show</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns (and eventually prints) a dictionary with some stats of this collection. E.g.,:</span>
|
||||
|
||||
<span class="sd"> >>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)</span>
|
||||
<span class="sd"> >>> data.training.stats()</span>
|
||||
<span class="sd"> >>> #instances=3821, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], prevs=[0.081, 0.919]</span>
|
||||
|
||||
<span class="sd"> :param show: if set to True (default), prints the stats in standard output</span>
|
||||
<span class="sd"> :return: a dictionary containing some stats of this collection. Keys include `#instances` (the number of</span>
|
||||
<span class="sd"> instances), `type` (the type representing the instances), `#features` (the number of features, if the</span>
|
||||
<span class="sd"> instances are in array-like format), `#classes` (the classes of the collection), `prevs` (the prevalence</span>
|
||||
<span class="sd"> values for each class)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">ninstances</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
||||
<span class="n">instance_type</span> <span class="o">=</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="n">instance_type</span> <span class="o">==</span> <span class="nb">list</span><span class="p">:</span>
|
||||
<span class="n">nfeats</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="k">elif</span> <span class="n">instance_type</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span> <span class="ow">or</span> <span class="n">issparse</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="p">):</span>
|
||||
<span class="n">nfeats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">nfeats</span> <span class="o">=</span> <span class="s1">'?'</span>
|
||||
<span class="n">stats_</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'instances'</span><span class="p">:</span> <span class="n">ninstances</span><span class="p">,</span>
|
||||
<span class="s1">'type'</span><span class="p">:</span> <span class="n">instance_type</span><span class="p">,</span>
|
||||
<span class="s1">'features'</span><span class="p">:</span> <span class="n">nfeats</span><span class="p">,</span>
|
||||
<span class="s1">'classes'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">,</span>
|
||||
<span class="s1">'prevs'</span><span class="p">:</span> <span class="n">strprev</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())}</span>
|
||||
<span class="k">if</span> <span class="n">show</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'#instances=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"instances"</span><span class="p">]</span><span class="si">}</span><span class="s1">, type=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span><span class="si">}</span><span class="s1">, #features=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"features"</span><span class="p">]</span><span class="si">}</span><span class="s1">, '</span>
|
||||
<span class="sa">f</span><span class="s1">'#classes=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"classes"</span><span class="p">]</span><span class="si">}</span><span class="s1">, prevs=</span><span class="si">{</span><span class="n">stats_</span><span class="p">[</span><span class="s2">"prevs"</span><span class="p">]</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">stats_</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelledCollection.kFCV">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.LabelledCollection.kFCV">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">kFCV</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generator of stratified folds to be used in k-fold cross validation.</span>
|
||||
|
||||
<span class="sd"> :param nfolds: integer (default 5), the number of folds to generate</span>
|
||||
<span class="sd"> :param nrepeats: integer (default 1), the number of rounds of k-fold cross validation to run</span>
|
||||
<span class="sd"> :param random_state: integer (default 0), guarantees that the folds generated are reproducible</span>
|
||||
<span class="sd"> :return: yields `nfolds * nrepeats` folds for k-fold cross validation</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">kf</span> <span class="o">=</span> <span class="n">RepeatedStratifiedKFold</span><span class="p">(</span><span class="n">n_splits</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">nrepeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">train_index</span><span class="p">,</span> <span class="n">test_index</span> <span class="ow">in</span> <span class="n">kf</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">Xy</span><span class="p">):</span>
|
||||
<span class="n">train</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">train_index</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">test_index</span><span class="p">)</span>
|
||||
<span class="k">yield</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Dataset">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">Dataset</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Abstraction of training and test :class:`LabelledCollection` objects.</span>
|
||||
|
||||
<span class="sd"> :param training: a :class:`LabelledCollection` instance</span>
|
||||
<span class="sd"> :param test: a :class:`LabelledCollection` instance</span>
|
||||
<span class="sd"> :param vocabulary: if indicated, is a dictionary of the terms used in this textual dataset</span>
|
||||
<span class="sd"> :param name: a string representing the name of the dataset</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">test</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">vocabulary</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="nb">set</span><span class="p">(</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span> <span class="o">==</span> <span class="nb">set</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="s1">'incompatible labels in training and test collections'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">training</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">test</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">vocabulary</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
|
||||
|
||||
<div class="viewcode-block" id="Dataset.SplitStratified">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.SplitStratified">[docs]</a>
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">SplitStratified</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">collection</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">train_size</span><span class="o">=</span><span class="mf">0.6</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generates a :class:`Dataset` from a stratified split of a :class:`LabelledCollection` instance.</span>
|
||||
<span class="sd"> See :meth:`LabelledCollection.split_stratified`</span>
|
||||
|
||||
<span class="sd"> :param collection: :class:`LabelledCollection`</span>
|
||||
<span class="sd"> :param train_size: the proportion of training documents (the rest conforms the test split)</span>
|
||||
<span class="sd"> :return: an instance of :class:`Dataset`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">collection</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="n">train_size</span><span class="p">))</span></div>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> The classes according to which the training collection is labelled</span>
|
||||
|
||||
<span class="sd"> :return: The classes according to which the training collection is labelled</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">n_classes</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> The number of classes according to which the training collection is labelled</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">n_classes</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns True if the training collection is labelled according to two classes</span>
|
||||
|
||||
<span class="sd"> :return: boolean</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">binary</span>
|
||||
|
||||
<div class="viewcode-block" id="Dataset.load">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.load">[docs]</a>
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">train_path</span><span class="p">,</span> <span class="n">test_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">:</span> <span class="n">callable</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads a training and a test labelled set of data and convert it into a :class:`Dataset` instance.</span>
|
||||
<span class="sd"> The function in charge of reading the instances must be specified. This function can be a custom one, or any of</span>
|
||||
<span class="sd"> the reading functions defined in :mod:`quapy.data.reader` module.</span>
|
||||
|
||||
<span class="sd"> :param train_path: string, the path to the file containing the training instances</span>
|
||||
<span class="sd"> :param test_path: string, the path to the file containing the test instances</span>
|
||||
<span class="sd"> :param loader_func: a custom function that implements the data loader and returns a tuple with instances and</span>
|
||||
<span class="sd"> labels</span>
|
||||
<span class="sd"> :param classes: array-like, the classes according to which the instances are labelled</span>
|
||||
<span class="sd"> :param loader_kwargs: any argument that the `loader_func` function needs in order to read the instances.</span>
|
||||
<span class="sd"> See :meth:`LabelledCollection.load` for further details.</span>
|
||||
<span class="sd"> :return: a :class:`Dataset` object</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">train_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">test_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="p">,</span> <span class="n">classes</span><span class="p">,</span> <span class="o">**</span><span class="n">loader_kwargs</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> If the dataset is textual, and the vocabulary was indicated, returns the size of the vocabulary</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary</span><span class="p">)</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">train_test</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Alias to `self.training` and `self.test`</span>
|
||||
|
||||
<span class="sd"> :return: the training and test collections</span>
|
||||
<span class="sd"> :return: the training and test collections</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span>
|
||||
|
||||
<div class="viewcode-block" id="Dataset.stats">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.stats">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">stats</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">show</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns (and eventually prints) a dictionary with some stats of this dataset. E.g.,:</span>
|
||||
|
||||
<span class="sd"> >>> data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)</span>
|
||||
<span class="sd"> >>> data.stats()</span>
|
||||
<span class="sd"> >>> Dataset=kindle #tr-instances=3821, #te-instances=21591, type=<class 'scipy.sparse.csr.csr_matrix'>, #features=4403, #classes=[0 1], tr-prevs=[0.081, 0.919], te-prevs=[0.063, 0.937]</span>
|
||||
|
||||
<span class="sd"> :param show: if set to True (default), prints the stats in standard output</span>
|
||||
<span class="sd"> :return: a dictionary containing some stats of this collection for the training and test collections. The keys</span>
|
||||
<span class="sd"> are `train` and `test`, and point to dedicated dictionaries of stats, for each collection, with keys</span>
|
||||
<span class="sd"> `#instances` (the number of instances), `type` (the type representing the instances),</span>
|
||||
<span class="sd"> `#features` (the number of features, if the instances are in array-like format), `#classes` (the classes of</span>
|
||||
<span class="sd"> the collection), `prevs` (the prevalence values for each class)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">tr_stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">(</span><span class="n">show</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="n">te_stats</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">stats</span><span class="p">(</span><span class="n">show</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">show</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="si">}</span><span class="s1"> #tr-instances=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"instances"</span><span class="p">]</span><span class="si">}</span><span class="s1">, #te-instances=</span><span class="si">{</span><span class="n">te_stats</span><span class="p">[</span><span class="s2">"instances"</span><span class="p">]</span><span class="si">}</span><span class="s1">, '</span>
|
||||
<span class="sa">f</span><span class="s1">'type=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span><span class="si">}</span><span class="s1">, #features=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"features"</span><span class="p">]</span><span class="si">}</span><span class="s1">, #classes=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"classes"</span><span class="p">]</span><span class="si">}</span><span class="s1">, '</span>
|
||||
<span class="sa">f</span><span class="s1">'tr-prevs=</span><span class="si">{</span><span class="n">tr_stats</span><span class="p">[</span><span class="s2">"prevs"</span><span class="p">]</span><span class="si">}</span><span class="s1">, te-prevs=</span><span class="si">{</span><span class="n">te_stats</span><span class="p">[</span><span class="s2">"prevs"</span><span class="p">]</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="p">{</span><span class="s1">'train'</span><span class="p">:</span> <span class="n">tr_stats</span><span class="p">,</span> <span class="s1">'test'</span><span class="p">:</span> <span class="n">te_stats</span><span class="p">}</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Dataset.kFCV">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.kFCV">[docs]</a>
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">kFCV</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generator of stratified folds to be used in k-fold cross validation. This function is only a wrapper around</span>
|
||||
<span class="sd"> :meth:`LabelledCollection.kFCV` that returns :class:`Dataset` instances made of training and test folds.</span>
|
||||
|
||||
<span class="sd"> :param nfolds: integer (default 5), the number of folds to generate</span>
|
||||
<span class="sd"> :param nrepeats: integer (default 1), the number of rounds of k-fold cross validation to run</span>
|
||||
<span class="sd"> :param random_state: integer (default 0), guarantees that the folds generated are reproducible</span>
|
||||
<span class="sd"> :return: yields `nfolds * nrepeats` folds for k-fold cross validation as instances of :class:`Dataset`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">nfolds</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">nrepeats</span><span class="o">=</span><span class="n">nrepeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)):</span>
|
||||
<span class="k">yield</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="sa">f</span><span class="s1">'fold </span><span class="si">{</span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="n">nfolds</span><span class="p">)</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">nfolds</span><span class="si">}</span><span class="s1"> (round=</span><span class="si">{</span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">//</span><span class="w"> </span><span class="n">nfolds</span><span class="p">)</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Dataset.reduce">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.base.Dataset.reduce">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">reduce</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n_train</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">n_test</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Reduce the number of instances in place for quick experiments. Preserves the prevalence of each set.</span>
|
||||
|
||||
<span class="sd"> :param n_train: number of training documents to keep (default 100)</span>
|
||||
<span class="sd"> :param n_test: number of test documents to keep (default 100)</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="n">n_train</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="n">n_test</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">())</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
</div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,919 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.data.datasets — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||||
<script src="../../../_static/jquery.js"></script>
|
||||
<script src="../../../_static/underscore.js"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../../_static/doctools.js"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.data.datasets</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.data.datasets</h1><div class="highlight"><pre>
|
||||
<div class="viewcode-block" id="warn"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.warn">[docs]</a><span></span><span class="k">def</span> <span class="nf">warn</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="k">pass</span></div>
|
||||
<span class="kn">import</span> <span class="nn">warnings</span>
|
||||
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span> <span class="o">=</span> <span class="n">warn</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">zipfile</span>
|
||||
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">join</span>
|
||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
||||
<span class="kn">from</span> <span class="nn">ucimlrepo</span> <span class="kn">import</span> <span class="n">fetch_ucirepo</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data.preprocessing</span> <span class="kn">import</span> <span class="n">text2tfidf</span><span class="p">,</span> <span class="n">reduce_columns</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data.reader</span> <span class="kn">import</span> <span class="o">*</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">download_file_if_not_exists</span><span class="p">,</span> <span class="n">download_file</span><span class="p">,</span> <span class="n">get_quapy_home</span><span class="p">,</span> <span class="n">pickled_resource</span>
|
||||
|
||||
|
||||
<span class="n">REVIEWS_SENTIMENT_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'hp'</span><span class="p">,</span> <span class="s1">'kindle'</span><span class="p">,</span> <span class="s1">'imdb'</span><span class="p">]</span>
|
||||
<span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'gasp'</span><span class="p">,</span> <span class="s1">'hcr'</span><span class="p">,</span> <span class="s1">'omd'</span><span class="p">,</span> <span class="s1">'sanders'</span><span class="p">,</span>
|
||||
<span class="s1">'semeval13'</span><span class="p">,</span> <span class="s1">'semeval14'</span><span class="p">,</span> <span class="s1">'semeval15'</span><span class="p">,</span> <span class="s1">'semeval16'</span><span class="p">,</span>
|
||||
<span class="s1">'sst'</span><span class="p">,</span> <span class="s1">'wa'</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">]</span>
|
||||
<span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'gasp'</span><span class="p">,</span> <span class="s1">'hcr'</span><span class="p">,</span> <span class="s1">'omd'</span><span class="p">,</span> <span class="s1">'sanders'</span><span class="p">,</span>
|
||||
<span class="s1">'semeval'</span><span class="p">,</span> <span class="s1">'semeval16'</span><span class="p">,</span>
|
||||
<span class="s1">'sst'</span><span class="p">,</span> <span class="s1">'wa'</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">]</span>
|
||||
<span class="n">UCI_BINARY_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'acute.a'</span><span class="p">,</span> <span class="s1">'acute.b'</span><span class="p">,</span>
|
||||
<span class="s1">'balance.1'</span><span class="p">,</span> <span class="s1">'balance.2'</span><span class="p">,</span> <span class="s1">'balance.3'</span><span class="p">,</span>
|
||||
<span class="s1">'breast-cancer'</span><span class="p">,</span>
|
||||
<span class="s1">'cmc.1'</span><span class="p">,</span> <span class="s1">'cmc.2'</span><span class="p">,</span> <span class="s1">'cmc.3'</span><span class="p">,</span>
|
||||
<span class="s1">'ctg.1'</span><span class="p">,</span> <span class="s1">'ctg.2'</span><span class="p">,</span> <span class="s1">'ctg.3'</span><span class="p">,</span>
|
||||
<span class="c1">#'diabetes', # <-- I haven't found this one...</span>
|
||||
<span class="s1">'german'</span><span class="p">,</span>
|
||||
<span class="s1">'haberman'</span><span class="p">,</span>
|
||||
<span class="s1">'ionosphere'</span><span class="p">,</span>
|
||||
<span class="s1">'iris.1'</span><span class="p">,</span> <span class="s1">'iris.2'</span><span class="p">,</span> <span class="s1">'iris.3'</span><span class="p">,</span>
|
||||
<span class="s1">'mammographic'</span><span class="p">,</span>
|
||||
<span class="s1">'pageblocks.5'</span><span class="p">,</span>
|
||||
<span class="c1">#'phoneme', # <-- I haven't found this one...</span>
|
||||
<span class="s1">'semeion'</span><span class="p">,</span>
|
||||
<span class="s1">'sonar'</span><span class="p">,</span>
|
||||
<span class="s1">'spambase'</span><span class="p">,</span>
|
||||
<span class="s1">'spectf'</span><span class="p">,</span>
|
||||
<span class="s1">'tictactoe'</span><span class="p">,</span>
|
||||
<span class="s1">'transfusion'</span><span class="p">,</span>
|
||||
<span class="s1">'wdbc'</span><span class="p">,</span>
|
||||
<span class="s1">'wine.1'</span><span class="p">,</span> <span class="s1">'wine.2'</span><span class="p">,</span> <span class="s1">'wine.3'</span><span class="p">,</span>
|
||||
<span class="s1">'wine-q-red'</span><span class="p">,</span> <span class="s1">'wine-q-white'</span><span class="p">,</span>
|
||||
<span class="s1">'yeast'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">UCI_MULTICLASS_DATASETS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'dry-bean'</span><span class="p">,</span>
|
||||
<span class="s1">'wine-quality'</span><span class="p">,</span>
|
||||
<span class="s1">'academic-success'</span><span class="p">,</span>
|
||||
<span class="s1">'digits'</span><span class="p">,</span>
|
||||
<span class="s1">'letter'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">LEQUA2022_TASKS</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'T1A'</span><span class="p">,</span> <span class="s1">'T1B'</span><span class="p">,</span> <span class="s1">'T2A'</span><span class="p">,</span> <span class="s1">'T2B'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">_TXA_SAMPLE_SIZE</span> <span class="o">=</span> <span class="mi">250</span>
|
||||
<span class="n">_TXB_SAMPLE_SIZE</span> <span class="o">=</span> <span class="mi">1000</span>
|
||||
|
||||
<span class="n">LEQUA2022_SAMPLE_SIZE</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'TXA'</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
|
||||
<span class="s1">'TXB'</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
|
||||
<span class="s1">'T1A'</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
|
||||
<span class="s1">'T1B'</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
|
||||
<span class="s1">'T2A'</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
|
||||
<span class="s1">'T2B'</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span><span class="p">,</span>
|
||||
<span class="s1">'binary'</span><span class="p">:</span> <span class="n">_TXA_SAMPLE_SIZE</span><span class="p">,</span>
|
||||
<span class="s1">'multiclass'</span><span class="p">:</span> <span class="n">_TXB_SAMPLE_SIZE</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_reviews"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_reviews">[docs]</a><span class="k">def</span> <span class="nf">fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dataset</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads a Reviews dataset as a Dataset instance, as used in</span>
|
||||
<span class="sd"> `Esuli, A., Moreo, A., and Sebastiani, F. "A recurrent neural network for sentiment quantification."</span>
|
||||
<span class="sd"> Proceedings of the 27th ACM International Conference on Information and Knowledge Management. 2018. <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_.</span>
|
||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.REVIEWS_SENTIMENT_DATASETS`</span>
|
||||
|
||||
<span class="sd"> :param dataset_name: the name of the dataset: valid ones are 'hp', 'kindle', 'imdb'</span>
|
||||
<span class="sd"> :param tfidf: set to True to transform the raw documents into tfidf weighted matrices</span>
|
||||
<span class="sd"> :param min_df: minimun number of documents that should contain a term in order for the term to be</span>
|
||||
<span class="sd"> kept (ignored if tfidf==False)</span>
|
||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
||||
<span class="sd"> ~/quay_data/ directory)</span>
|
||||
<span class="sd"> :param pickle: set to True to pickle the Dataset object the first time it is generated, in order to allow for</span>
|
||||
<span class="sd"> faster subsequent invokations</span>
|
||||
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset for sentiment reviews. '</span> \
|
||||
<span class="sa">f</span><span class="s1">'Valid ones are </span><span class="si">{</span><span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
||||
|
||||
<span class="n">URL_TRAIN</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'https://zenodo.org/record/4117827/files/</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">_train.txt'</span>
|
||||
<span class="n">URL_TEST</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'https://zenodo.org/record/4117827/files/</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">_test.txt'</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'reviews'</span><span class="p">),</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">train_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'reviews'</span><span class="p">,</span> <span class="n">dataset_name</span><span class="p">,</span> <span class="s1">'train.txt'</span><span class="p">)</span>
|
||||
<span class="n">test_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'reviews'</span><span class="p">,</span> <span class="n">dataset_name</span><span class="p">,</span> <span class="s1">'test.txt'</span><span class="p">)</span>
|
||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">URL_TRAIN</span><span class="p">,</span> <span class="n">train_path</span><span class="p">)</span>
|
||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">URL_TEST</span><span class="p">,</span> <span class="n">test_path</span><span class="p">)</span>
|
||||
|
||||
<span class="n">pickle_path</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">if</span> <span class="n">pickle</span><span class="p">:</span>
|
||||
<span class="n">pickle_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'reviews'</span><span class="p">,</span> <span class="s1">'pickle'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">.pkl'</span><span class="p">)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="n">Dataset</span><span class="o">.</span><span class="n">load</span><span class="p">,</span> <span class="n">train_path</span><span class="p">,</span> <span class="n">test_path</span><span class="p">,</span> <span class="n">from_text</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">tfidf</span><span class="p">:</span>
|
||||
<span class="n">text2tfidf</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">min_df</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">reduce_columns</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">data</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">dataset_name</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">data</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_twitter"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_twitter">[docs]</a><span class="k">def</span> <span class="nf">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dataset</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads a Twitter dataset as a :class:`quapy.data.base.Dataset` instance, as used in:</span>
|
||||
<span class="sd"> `Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.</span>
|
||||
<span class="sd"> Social Network Analysis and Mining6(19), 1–22 (2016) <https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf>`_</span>
|
||||
<span class="sd"> Note that the datasets 'semeval13', 'semeval14', 'semeval15' share the same training set.</span>
|
||||
<span class="sd"> The list of valid dataset names corresponding to training sets can be accessed in</span>
|
||||
<span class="sd"> `quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN`, while the test sets can be accessed in</span>
|
||||
<span class="sd"> `quapy.data.datasets.TWITTER_SENTIMENT_DATASETS_TEST`</span>
|
||||
|
||||
<span class="sd"> :param dataset_name: the name of the dataset: valid ones are 'gasp', 'hcr', 'omd', 'sanders', 'semeval13',</span>
|
||||
<span class="sd"> 'semeval14', 'semeval15', 'semeval16', 'sst', 'wa', 'wb'</span>
|
||||
<span class="sd"> :param for_model_selection: if True, then returns the train split as the training set and the devel split</span>
|
||||
<span class="sd"> as the test set; if False, then returns the train+devel split as the training set and the test set as the</span>
|
||||
<span class="sd"> test set</span>
|
||||
<span class="sd"> :param min_df: minimun number of documents that should contain a term in order for the term to be kept</span>
|
||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
||||
<span class="sd"> ~/quay_data/ directory)</span>
|
||||
<span class="sd"> :param pickle: set to True to pickle the Dataset object the first time it is generated, in order to allow for</span>
|
||||
<span class="sd"> faster subsequent invokations</span>
|
||||
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span> <span class="o">+</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset for sentiment twitter. '</span> \
|
||||
<span class="sa">f</span><span class="s1">'Valid ones are </span><span class="si">{</span><span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="si">}</span><span class="s1"> for model selection and '</span> \
|
||||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="si">}</span><span class="s1"> for test (datasets "semeval14", "semeval15", "semeval16" share '</span> \
|
||||
<span class="sa">f</span><span class="s1">'a common training set "semeval")'</span>
|
||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
||||
|
||||
<span class="n">URL</span> <span class="o">=</span> <span class="s1">'https://zenodo.org/record/4255764/files/tweet_sentiment_quantification_snam.zip'</span>
|
||||
<span class="n">unzipped_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'tweet_sentiment_quantification_snam'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">):</span>
|
||||
<span class="n">downloaded_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'tweet_sentiment_quantification_snam.zip'</span><span class="p">)</span>
|
||||
<span class="n">download_file</span><span class="p">(</span><span class="n">URL</span><span class="p">,</span> <span class="n">downloaded_path</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">downloaded_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
|
||||
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">data_home</span><span class="p">)</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">downloaded_path</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="p">{</span><span class="s1">'semeval13'</span><span class="p">,</span> <span class="s1">'semeval14'</span><span class="p">,</span> <span class="s1">'semeval15'</span><span class="p">}:</span>
|
||||
<span class="n">trainset_name</span> <span class="o">=</span> <span class="s1">'semeval'</span>
|
||||
<span class="n">testset_name</span> <span class="o">=</span> <span class="s1">'semeval'</span> <span class="k">if</span> <span class="n">for_model_selection</span> <span class="k">else</span> <span class="n">dataset_name</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"the training and development sets for datasets 'semeval13', 'semeval14', 'semeval15' are common "</span>
|
||||
<span class="sa">f</span><span class="s2">"(called 'semeval'); returning trainin-set='</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s2">' and test-set=</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'semeval'</span> <span class="ow">and</span> <span class="n">for_model_selection</span><span class="o">==</span><span class="kc">False</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'dataset "semeval" can only be used for model selection. '</span>
|
||||
<span class="s1">'Use "semeval13", "semeval14", or "semeval15" for model evaluation.'</span><span class="p">)</span>
|
||||
<span class="n">trainset_name</span> <span class="o">=</span> <span class="n">testset_name</span> <span class="o">=</span> <span class="n">dataset_name</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">for_model_selection</span><span class="p">:</span>
|
||||
<span class="n">train</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'train'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s1">.train.feature.txt'</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'test'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.dev.feature.txt'</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">train</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'train'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">trainset_name</span><span class="si">}</span><span class="s1">.train+dev.feature.txt'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'semeval16'</span><span class="p">:</span> <span class="c1"># there is a different test name in the case of semeval16 only</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'test'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.dev-test.feature.txt'</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'test'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.test.feature.txt'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">pickle_path</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">if</span> <span class="n">pickle</span><span class="p">:</span>
|
||||
<span class="n">mode</span> <span class="o">=</span> <span class="s2">"train-dev"</span> <span class="k">if</span> <span class="n">for_model_selection</span> <span class="k">else</span> <span class="s2">"train+dev-test"</span>
|
||||
<span class="n">pickle_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="s1">'pickle'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">testset_name</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">mode</span><span class="si">}</span><span class="s1">.pkl'</span><span class="p">)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="n">Dataset</span><span class="o">.</span><span class="n">load</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">from_sparse</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">min_df</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">reduce_columns</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">data</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">dataset_name</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">data</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_UCIBinaryDataset"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIBinaryDataset">[docs]</a><span class="k">def</span> <span class="nf">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">test_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dataset</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads a UCI dataset as an instance of :class:`quapy.data.base.Dataset`, as used in</span>
|
||||
<span class="sd"> `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).</span>
|
||||
<span class="sd"> Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.</span>
|
||||
<span class="sd"> Information Fusion, 34, 87-100. <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_</span>
|
||||
<span class="sd"> and</span>
|
||||
<span class="sd"> `Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).</span>
|
||||
<span class="sd"> Dynamic ensemble selection for quantification tasks.</span>
|
||||
<span class="sd"> Information Fusion, 45, 1-15. <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
||||
<span class="sd"> The datasets do not come with a predefined train-test split (see :meth:`fetch_UCILabelledCollection` for further</span>
|
||||
<span class="sd"> information on how to use these collections), and so a train-test split is generated at desired proportion.</span>
|
||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`</span>
|
||||
|
||||
<span class="sd"> :param dataset_name: a dataset name</span>
|
||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
||||
<span class="sd"> ~/quay_data/ directory)</span>
|
||||
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
|
||||
<span class="sd"> :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets</span>
|
||||
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_UCIBinaryLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">test_split</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_UCIBinaryLabelledCollection"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIBinaryLabelledCollection">[docs]</a><span class="k">def</span> <span class="nf">fetch_UCIBinaryLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">LabelledCollection</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads a UCI collection as an instance of :class:`quapy.data.base.LabelledCollection`, as used in</span>
|
||||
<span class="sd"> `Pérez-Gállego, P., Quevedo, J. R., & del Coz, J. J. (2017).</span>
|
||||
<span class="sd"> Using ensembles for problems with characterizable changes in data distribution: A case study on quantification.</span>
|
||||
<span class="sd"> Information Fusion, 34, 87-100. <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_</span>
|
||||
<span class="sd"> and</span>
|
||||
<span class="sd"> `Pérez-Gállego, P., Castano, A., Quevedo, J. R., & del Coz, J. J. (2019).</span>
|
||||
<span class="sd"> Dynamic ensemble selection for quantification tasks.</span>
|
||||
<span class="sd"> Information Fusion, 45, 1-15. <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
||||
<span class="sd"> The datasets do not come with a predefined train-test split, and so Pérez-Gállego et al. adopted a 5FCVx2 evaluation</span>
|
||||
<span class="sd"> protocol, meaning that each collection was used to generate two rounds (hence the x2) of 5 fold cross validation.</span>
|
||||
<span class="sd"> This can be reproduced by using :meth:`quapy.data.base.Dataset.kFCV`, e.g.:</span>
|
||||
|
||||
<span class="sd"> >>> import quapy as qp</span>
|
||||
<span class="sd"> >>> collection = qp.datasets.fetch_UCIBinaryLabelledCollection("yeast")</span>
|
||||
<span class="sd"> >>> for data in qp.train.Dataset.kFCV(collection, nfolds=5, nrepeats=2):</span>
|
||||
<span class="sd"> >>> ...</span>
|
||||
|
||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_DATASETS`</span>
|
||||
|
||||
<span class="sd"> :param dataset_name: a dataset name</span>
|
||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
||||
<span class="sd"> ~/quay_data/ directory)</span>
|
||||
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
|
||||
<span class="sd"> :param verbose: set to True (default is False) to get information (from the UCI ML repository) about the datasets</span>
|
||||
<span class="sd"> :return: a :class:`quapy.data.base.LabelledCollection` instance</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset from the UCI Machine Learning datasets repository. '</span> \
|
||||
<span class="sa">f</span><span class="s1">'Valid ones are </span><span class="si">{</span><span class="n">UCI_BINARY_DATASETS</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
||||
|
||||
<span class="n">dataset_fullname</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'acute.a'</span><span class="p">:</span> <span class="s1">'Acute Inflammations (urinary bladder)'</span><span class="p">,</span>
|
||||
<span class="s1">'acute.b'</span><span class="p">:</span> <span class="s1">'Acute Inflammations (renal pelvis)'</span><span class="p">,</span>
|
||||
<span class="s1">'balance.1'</span><span class="p">:</span> <span class="s1">'Balance Scale Weight & Distance Database (left)'</span><span class="p">,</span>
|
||||
<span class="s1">'balance.2'</span><span class="p">:</span> <span class="s1">'Balance Scale Weight & Distance Database (balanced)'</span><span class="p">,</span>
|
||||
<span class="s1">'balance.3'</span><span class="p">:</span> <span class="s1">'Balance Scale Weight & Distance Database (right)'</span><span class="p">,</span>
|
||||
<span class="s1">'breast-cancer'</span><span class="p">:</span> <span class="s1">'Breast Cancer Wisconsin (Original)'</span><span class="p">,</span>
|
||||
<span class="s1">'cmc.1'</span><span class="p">:</span> <span class="s1">'Contraceptive Method Choice (no use)'</span><span class="p">,</span>
|
||||
<span class="s1">'cmc.2'</span><span class="p">:</span> <span class="s1">'Contraceptive Method Choice (long term)'</span><span class="p">,</span>
|
||||
<span class="s1">'cmc.3'</span><span class="p">:</span> <span class="s1">'Contraceptive Method Choice (short term)'</span><span class="p">,</span>
|
||||
<span class="s1">'ctg.1'</span><span class="p">:</span> <span class="s1">'Cardiotocography Data Set (normal)'</span><span class="p">,</span>
|
||||
<span class="s1">'ctg.2'</span><span class="p">:</span> <span class="s1">'Cardiotocography Data Set (suspect)'</span><span class="p">,</span>
|
||||
<span class="s1">'ctg.3'</span><span class="p">:</span> <span class="s1">'Cardiotocography Data Set (pathologic)'</span><span class="p">,</span>
|
||||
<span class="s1">'german'</span><span class="p">:</span> <span class="s1">'Statlog German Credit Data'</span><span class="p">,</span>
|
||||
<span class="s1">'haberman'</span><span class="p">:</span> <span class="s2">"Haberman's Survival Data"</span><span class="p">,</span>
|
||||
<span class="s1">'ionosphere'</span><span class="p">:</span> <span class="s1">'Johns Hopkins University Ionosphere DB'</span><span class="p">,</span>
|
||||
<span class="s1">'iris.1'</span><span class="p">:</span> <span class="s1">'Iris Plants Database(x)'</span><span class="p">,</span>
|
||||
<span class="s1">'iris.2'</span><span class="p">:</span> <span class="s1">'Iris Plants Database(versicolour)'</span><span class="p">,</span>
|
||||
<span class="s1">'iris.3'</span><span class="p">:</span> <span class="s1">'Iris Plants Database(virginica)'</span><span class="p">,</span>
|
||||
<span class="s1">'mammographic'</span><span class="p">:</span> <span class="s1">'Mammographic Mass'</span><span class="p">,</span>
|
||||
<span class="s1">'pageblocks.5'</span><span class="p">:</span> <span class="s1">'Page Blocks Classification (5)'</span><span class="p">,</span>
|
||||
<span class="s1">'semeion'</span><span class="p">:</span> <span class="s1">'Semeion Handwritten Digit (8)'</span><span class="p">,</span>
|
||||
<span class="s1">'sonar'</span><span class="p">:</span> <span class="s1">'Sonar, Mines vs. Rocks'</span><span class="p">,</span>
|
||||
<span class="s1">'spambase'</span><span class="p">:</span> <span class="s1">'Spambase Data Set'</span><span class="p">,</span>
|
||||
<span class="s1">'spectf'</span><span class="p">:</span> <span class="s1">'SPECTF Heart Data'</span><span class="p">,</span>
|
||||
<span class="s1">'tictactoe'</span><span class="p">:</span> <span class="s1">'Tic-Tac-Toe Endgame Database'</span><span class="p">,</span>
|
||||
<span class="s1">'transfusion'</span><span class="p">:</span> <span class="s1">'Blood Transfusion Service Center Data Set'</span><span class="p">,</span>
|
||||
<span class="s1">'wdbc'</span><span class="p">:</span> <span class="s1">'Wisconsin Diagnostic Breast Cancer'</span><span class="p">,</span>
|
||||
<span class="s1">'wine.1'</span><span class="p">:</span> <span class="s1">'Wine Recognition Data (1)'</span><span class="p">,</span>
|
||||
<span class="s1">'wine.2'</span><span class="p">:</span> <span class="s1">'Wine Recognition Data (2)'</span><span class="p">,</span>
|
||||
<span class="s1">'wine.3'</span><span class="p">:</span> <span class="s1">'Wine Recognition Data (3)'</span><span class="p">,</span>
|
||||
<span class="s1">'wine-q-red'</span><span class="p">:</span> <span class="s1">'Wine Quality Red (6-10)'</span><span class="p">,</span>
|
||||
<span class="s1">'wine-q-white'</span><span class="p">:</span> <span class="s1">'Wine Quality White (6-10)'</span><span class="p">,</span>
|
||||
<span class="s1">'yeast'</span><span class="p">:</span> <span class="s1">'Yeast'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># the identifier is an alias for the dataset group, it's part of the url data-folder, and is the name we use</span>
|
||||
<span class="c1"># to download the raw dataset</span>
|
||||
<span class="n">identifier_map</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'acute.a'</span><span class="p">:</span> <span class="s1">'acute'</span><span class="p">,</span>
|
||||
<span class="s1">'acute.b'</span><span class="p">:</span> <span class="s1">'acute'</span><span class="p">,</span>
|
||||
<span class="s1">'balance.1'</span><span class="p">:</span> <span class="s1">'balance-scale'</span><span class="p">,</span>
|
||||
<span class="s1">'balance.2'</span><span class="p">:</span> <span class="s1">'balance-scale'</span><span class="p">,</span>
|
||||
<span class="s1">'balance.3'</span><span class="p">:</span> <span class="s1">'balance-scale'</span><span class="p">,</span>
|
||||
<span class="s1">'breast-cancer'</span><span class="p">:</span> <span class="s1">'breast-cancer-wisconsin'</span><span class="p">,</span>
|
||||
<span class="s1">'cmc.1'</span><span class="p">:</span> <span class="s1">'cmc'</span><span class="p">,</span>
|
||||
<span class="s1">'cmc.2'</span><span class="p">:</span> <span class="s1">'cmc'</span><span class="p">,</span>
|
||||
<span class="s1">'cmc.3'</span><span class="p">:</span> <span class="s1">'cmc'</span><span class="p">,</span>
|
||||
<span class="s1">'ctg.1'</span><span class="p">:</span> <span class="s1">'00193'</span><span class="p">,</span>
|
||||
<span class="s1">'ctg.2'</span><span class="p">:</span> <span class="s1">'00193'</span><span class="p">,</span>
|
||||
<span class="s1">'ctg.3'</span><span class="p">:</span> <span class="s1">'00193'</span><span class="p">,</span>
|
||||
<span class="s1">'german'</span><span class="p">:</span> <span class="s1">'statlog/german'</span><span class="p">,</span>
|
||||
<span class="s1">'haberman'</span><span class="p">:</span> <span class="s1">'haberman'</span><span class="p">,</span>
|
||||
<span class="s1">'ionosphere'</span><span class="p">:</span> <span class="s1">'ionosphere'</span><span class="p">,</span>
|
||||
<span class="s1">'iris.1'</span><span class="p">:</span> <span class="s1">'iris'</span><span class="p">,</span>
|
||||
<span class="s1">'iris.2'</span><span class="p">:</span> <span class="s1">'iris'</span><span class="p">,</span>
|
||||
<span class="s1">'iris.3'</span><span class="p">:</span> <span class="s1">'iris'</span><span class="p">,</span>
|
||||
<span class="s1">'mammographic'</span><span class="p">:</span> <span class="s1">'mammographic-masses'</span><span class="p">,</span>
|
||||
<span class="s1">'pageblocks.5'</span><span class="p">:</span> <span class="s1">'page-blocks'</span><span class="p">,</span>
|
||||
<span class="s1">'semeion'</span><span class="p">:</span> <span class="s1">'semeion'</span><span class="p">,</span>
|
||||
<span class="s1">'sonar'</span><span class="p">:</span> <span class="s1">'undocumented/connectionist-bench/sonar'</span><span class="p">,</span>
|
||||
<span class="s1">'spambase'</span><span class="p">:</span> <span class="s1">'spambase'</span><span class="p">,</span>
|
||||
<span class="s1">'spectf'</span><span class="p">:</span> <span class="s1">'spect'</span><span class="p">,</span>
|
||||
<span class="s1">'tictactoe'</span><span class="p">:</span> <span class="s1">'tic-tac-toe'</span><span class="p">,</span>
|
||||
<span class="s1">'transfusion'</span><span class="p">:</span> <span class="s1">'blood-transfusion'</span><span class="p">,</span>
|
||||
<span class="s1">'wdbc'</span><span class="p">:</span> <span class="s1">'breast-cancer-wisconsin'</span><span class="p">,</span>
|
||||
<span class="s1">'wine-q-red'</span><span class="p">:</span> <span class="s1">'wine-quality'</span><span class="p">,</span>
|
||||
<span class="s1">'wine-q-white'</span><span class="p">:</span> <span class="s1">'wine-quality'</span><span class="p">,</span>
|
||||
<span class="s1">'wine.1'</span><span class="p">:</span> <span class="s1">'wine'</span><span class="p">,</span>
|
||||
<span class="s1">'wine.2'</span><span class="p">:</span> <span class="s1">'wine'</span><span class="p">,</span>
|
||||
<span class="s1">'wine.3'</span><span class="p">:</span> <span class="s1">'wine'</span><span class="p">,</span>
|
||||
<span class="s1">'yeast'</span><span class="p">:</span> <span class="s1">'yeast'</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># the filename is the name of the file within the data_folder indexed by the identifier</span>
|
||||
<span class="n">file_name</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'acute'</span><span class="p">:</span> <span class="s1">'diagnosis.data'</span><span class="p">,</span>
|
||||
<span class="s1">'00193'</span><span class="p">:</span> <span class="s1">'CTG.xls'</span><span class="p">,</span>
|
||||
<span class="s1">'statlog/german'</span><span class="p">:</span> <span class="s1">'german.data-numeric'</span><span class="p">,</span>
|
||||
<span class="s1">'mammographic-masses'</span><span class="p">:</span> <span class="s1">'mammographic_masses.data'</span><span class="p">,</span>
|
||||
<span class="s1">'page-blocks'</span><span class="p">:</span> <span class="s1">'page-blocks.data.Z'</span><span class="p">,</span>
|
||||
<span class="s1">'undocumented/connectionist-bench/sonar'</span><span class="p">:</span> <span class="s1">'sonar.all-data'</span><span class="p">,</span>
|
||||
<span class="s1">'spect'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'SPECTF.train'</span><span class="p">,</span> <span class="s1">'SPECTF.test'</span><span class="p">],</span>
|
||||
<span class="s1">'blood-transfusion'</span><span class="p">:</span> <span class="s1">'transfusion.data'</span><span class="p">,</span>
|
||||
<span class="s1">'wine-quality'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'winequality-red.csv'</span><span class="p">,</span> <span class="s1">'winequality-white.csv'</span><span class="p">],</span>
|
||||
<span class="s1">'breast-cancer-wisconsin'</span><span class="p">:</span> <span class="s1">'breast-cancer-wisconsin.data'</span> <span class="k">if</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">'breast-cancer'</span> <span class="k">else</span> <span class="s1">'wdbc.data'</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="c1"># the filename containing the dataset description (if any)</span>
|
||||
<span class="n">desc_name</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'acute'</span><span class="p">:</span> <span class="s1">'diagnosis.names'</span><span class="p">,</span>
|
||||
<span class="s1">'00193'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="s1">'statlog/german'</span><span class="p">:</span> <span class="s1">'german.doc'</span><span class="p">,</span>
|
||||
<span class="s1">'mammographic-masses'</span><span class="p">:</span> <span class="s1">'mammographic_masses.names'</span><span class="p">,</span>
|
||||
<span class="s1">'undocumented/connectionist-bench/sonar'</span><span class="p">:</span> <span class="s1">'sonar.names'</span><span class="p">,</span>
|
||||
<span class="s1">'spect'</span><span class="p">:</span> <span class="s1">'SPECTF.names'</span><span class="p">,</span>
|
||||
<span class="s1">'blood-transfusion'</span><span class="p">:</span> <span class="s1">'transfusion.names'</span><span class="p">,</span>
|
||||
<span class="s1">'wine-quality'</span><span class="p">:</span> <span class="s1">'winequality.names'</span><span class="p">,</span>
|
||||
<span class="s1">'breast-cancer-wisconsin'</span><span class="p">:</span> <span class="s1">'breast-cancer-wisconsin.names'</span> <span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'breast-cancer'</span> <span class="k">else</span> <span class="s1">'wdbc.names'</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">identifier</span> <span class="o">=</span> <span class="n">identifier_map</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
|
||||
<span class="n">filename</span> <span class="o">=</span> <span class="n">file_name</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">identifier</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">.data'</span><span class="p">)</span>
|
||||
<span class="n">descfile</span> <span class="o">=</span> <span class="n">desc_name</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">identifier</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">.names'</span><span class="p">)</span>
|
||||
<span class="n">fullname</span> <span class="o">=</span> <span class="n">dataset_fullname</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
|
||||
|
||||
<span class="n">URL</span> <span class="o">=</span> <span class="sa">f</span><span class="s1">'http://archive.ics.uci.edu/ml/machine-learning-databases/</span><span class="si">{</span><span class="n">identifier</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="n">data_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'uci_datasets'</span><span class="p">,</span> <span class="n">identifier</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">filename</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> <span class="c1"># filename could be a list of files, in which case it will be processed later</span>
|
||||
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
|
||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">descfile</span><span class="p">:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">data_dir</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">data_dir</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">descfile</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
|
||||
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'could not read the description file'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'no file description available'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Loading </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> (</span><span class="si">{</span><span class="n">fullname</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'acute'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-16'</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">'</span><span class="se">\t</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">float</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">','</span><span class="p">,</span> <span class="s1">'.'</span><span class="p">)))</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="p">[</span><span class="n">_df_replace</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">6</span><span class="p">)]</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'acute.a'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">6</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'yes'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'acute.b'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">7</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'yes'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'balance-scale'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'balance.1'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'L'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'balance.2'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'B'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'balance.3'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'R'</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'breast-cancer-wisconsin'</span> <span class="ow">and</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">'breast-cancer'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">Xy</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">10</span><span class="p">]</span>
|
||||
<span class="n">Xy</span><span class="p">[</span><span class="n">Xy</span><span class="o">==</span><span class="s1">'?'</span><span class="p">]</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span>
|
||||
<span class="n">Xy</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">Xy</span><span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'breast-cancer-wisconsin'</span> <span class="ow">and</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">'wdbc'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">:</span><span class="mi">32</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'M'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'cmc'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">8</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'cmc.1'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'cmc.2'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'cmc.3'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'00193'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_excel</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">'Data'</span><span class="p">,</span> <span class="n">skipfooter</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span><span class="mi">24</span><span class="p">))]</span> <span class="c1"># select columns numbered (number 23 is the target label)</span>
|
||||
<span class="c1"># replaces the header with the first row</span>
|
||||
<span class="n">new_header</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="c1"># grab the first row for the header</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="c1"># take the data less the header row</span>
|
||||
<span class="n">df</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">new_header</span> <span class="c1"># set the header row as the df header</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">22</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="s1">'NSP'</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'ctg.1'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="c1"># 1==Normal</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'ctg.2'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> <span class="c1"># 2==Suspect</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'ctg.3'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span> <span class="c1"># 3==Pathologic</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'statlog/german'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">24</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">24</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'haberman'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">int</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'ionosphere'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">34</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">34</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'b'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'iris'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'iris.1'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'Iris-setosa'</span><span class="p">)</span> <span class="c1"># 1==Setosa</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'iris.2'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'Iris-versicolor'</span><span class="p">)</span> <span class="c1"># 2==Versicolor</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'iris.3'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'Iris-virginica'</span><span class="p">)</span> <span class="c1"># 3==Virginica</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'mammographic-masses'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">df</span><span class="p">[</span><span class="n">df</span> <span class="o">==</span> <span class="s1">'?'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
|
||||
<span class="n">Xy</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">Xy</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">Xy</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span><span class="mi">5</span><span class="p">],</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'page-blocks'</span><span class="p">:</span>
|
||||
<span class="n">data_path_</span> <span class="o">=</span> <span class="n">data_path</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'.Z'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">data_path_</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Warning: file </span><span class="si">{</span><span class="n">data_path_</span><span class="si">}</span><span class="s1"> does not exist. If this is the first time you '</span>
|
||||
<span class="sa">f</span><span class="s1">'attempt to load this dataset, then you have to manually unzip the </span><span class="si">{</span><span class="n">data_path</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'and name the extracted file </span><span class="si">{</span><span class="n">data_path_</span><span class="si">}</span><span class="s1"> (unfortunately, neither zipfile, nor '</span>
|
||||
<span class="sa">f</span><span class="s1">'gzip can handle unix compressed files automatically -- there is a repo in GitHub '</span>
|
||||
<span class="sa">f</span><span class="s1">'https://github.com/umeat/unlzw where the problem seems to be solved anyway).'</span><span class="p">)</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path_</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">10</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">10</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span> <span class="c1"># 5==block "graphic"</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'semeion'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span> <span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">256</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">263</span><span class="p">]</span><span class="o">.</span><span class="n">values</span> <span class="c1"># 263 stands for digit 8 (labels are one-hot vectors from col 256-266)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'undocumented/connectionist-bench/sonar'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">60</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">60</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'R'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'spambase'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">57</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">57</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'spect'</span><span class="p">:</span>
|
||||
<span class="n">dfs</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">file</span> <span class="ow">in</span> <span class="n">filename</span><span class="p">:</span>
|
||||
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">file</span><span class="p">)</span>
|
||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">file</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
|
||||
<span class="n">dfs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">))</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="n">dfs</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">45</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'tic-tac-toe'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'o'</span><span class="p">,</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'b'</span><span class="p">,</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'x'</span><span class="p">,</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'negative'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'blood-transfusion'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">4</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'wine'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">14</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'wine.1'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'wine.2'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'wine.3'</span><span class="p">:</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'wine-quality'</span><span class="p">:</span>
|
||||
<span class="n">filename</span> <span class="o">=</span> <span class="n">filename</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">if</span> <span class="n">dataset_name</span><span class="o">==</span><span class="s1">'wine-q-red'</span> <span class="k">else</span> <span class="n">filename</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">data_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_dir</span><span class="p">,</span> <span class="n">filename</span><span class="p">)</span>
|
||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">URL</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">filename</span><span class="si">}</span><span class="s1">'</span><span class="p">,</span> <span class="n">data_path</span><span class="p">)</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s1">';'</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">11</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">11</span><span class="p">]</span><span class="o">.</span><span class="n">values</span> <span class="o">></span> <span class="mi">5</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">identifier</span> <span class="o">==</span> <span class="s1">'yeast'</span><span class="p">:</span>
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">data_path</span><span class="p">,</span> <span class="n">header</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">delim_whitespace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">)</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">values</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="s1">'NUC'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">data</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_UCIMulticlassDataset"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIMulticlassDataset">[docs]</a><span class="k">def</span> <span class="nf">fetch_UCIMulticlassDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">test_split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dataset</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads a UCI multiclass dataset as an instance of :class:`quapy.data.base.Dataset`. </span>
|
||||
|
||||
<span class="sd"> The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria:</span>
|
||||
<span class="sd"> - It has more than 1000 instances</span>
|
||||
<span class="sd"> - It is suited for classification</span>
|
||||
<span class="sd"> - It has more than two classes</span>
|
||||
<span class="sd"> - It is available for Python import (requires ucimlrepo package)</span>
|
||||
|
||||
<span class="sd"> >>> import quapy as qp</span>
|
||||
<span class="sd"> >>> dataset = qp.datasets.fetch_UCIMulticlassDataset("dry-bean")</span>
|
||||
<span class="sd"> >>> train, test = dataset.train_test</span>
|
||||
<span class="sd"> >>> ...</span>
|
||||
|
||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS`</span>
|
||||
|
||||
<span class="sd"> The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.</span>
|
||||
|
||||
<span class="sd"> :param dataset_name: a dataset name</span>
|
||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
||||
<span class="sd"> ~/quay_data/ directory)</span>
|
||||
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
|
||||
<span class="sd"> :param verbose: set to True (default is False) to get information (stats) about the dataset</span>
|
||||
<span class="sd"> :return: a :class:`quapy.data.base.Dataset` instance</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="o">*</span><span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">test_split</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_UCIMulticlassLabelledCollection"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_UCIMulticlassLabelledCollection">[docs]</a><span class="k">def</span> <span class="nf">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="o">-></span> <span class="n">LabelledCollection</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads a UCI multiclass collection as an instance of :class:`quapy.data.base.LabelledCollection`.</span>
|
||||
|
||||
<span class="sd"> The list of available datasets is taken from https://archive.ics.uci.edu/, following these criteria:</span>
|
||||
<span class="sd"> - It has more than 1000 instances</span>
|
||||
<span class="sd"> - It is suited for classification</span>
|
||||
<span class="sd"> - It has more than two classes</span>
|
||||
<span class="sd"> - It is available for Python import (requires ucimlrepo package)</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> >>> import quapy as qp</span>
|
||||
<span class="sd"> >>> collection = qp.datasets.fetch_UCIMulticlassLabelledCollection("dry-bean")</span>
|
||||
<span class="sd"> >>> X, y = collection.Xy</span>
|
||||
<span class="sd"> >>> ...</span>
|
||||
|
||||
<span class="sd"> The list of valid dataset names can be accessed in `quapy.data.datasets.UCI_MULTICLASS_DATASETS`</span>
|
||||
|
||||
<span class="sd"> The datasets are downloaded only once and pickled into disk, saving time for consecutive calls.</span>
|
||||
|
||||
<span class="sd"> :param dataset_name: a dataset name</span>
|
||||
<span class="sd"> :param data_home: specify the quapy home directory where the dataset will be dumped (leave empty to use the default</span>
|
||||
<span class="sd"> ~/quay_data/ directory)</span>
|
||||
<span class="sd"> :param test_split: proportion of documents to be included in the test set. The rest conforms the training set</span>
|
||||
<span class="sd"> :param verbose: set to True (default is False) to get information (stats) about the dataset</span>
|
||||
<span class="sd"> :return: a :class:`quapy.data.base.LabelledCollection` instance</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">dataset_name</span> <span class="ow">in</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'Name </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> does not match any known dataset from the '</span> \
|
||||
<span class="sa">f</span><span class="s1">'UCI Machine Learning datasets repository (multiclass). '</span> \
|
||||
<span class="sa">f</span><span class="s1">'Valid ones are </span><span class="si">{</span><span class="n">UCI_MULTICLASS_DATASETS</span><span class="si">}</span><span class="s1">'</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
||||
|
||||
<span class="n">identifiers</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"dry-bean"</span><span class="p">:</span> <span class="mi">602</span><span class="p">,</span>
|
||||
<span class="s2">"wine-quality"</span><span class="p">:</span> <span class="mi">186</span><span class="p">,</span>
|
||||
<span class="s2">"academic-success"</span><span class="p">:</span> <span class="mi">697</span><span class="p">,</span>
|
||||
<span class="s2">"digits"</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span>
|
||||
<span class="s2">"letter"</span><span class="p">:</span> <span class="mi">59</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">full_names</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s2">"dry-bean"</span><span class="p">:</span> <span class="s2">"Dry Bean Dataset"</span><span class="p">,</span>
|
||||
<span class="s2">"wine-quality"</span><span class="p">:</span> <span class="s2">"Wine Quality"</span><span class="p">,</span>
|
||||
<span class="s2">"academic-success"</span><span class="p">:</span> <span class="s2">"Predict students' dropout and academic success"</span><span class="p">,</span>
|
||||
<span class="s2">"digits"</span><span class="p">:</span> <span class="s2">"Optical Recognition of Handwritten Digits"</span><span class="p">,</span>
|
||||
<span class="s2">"letter"</span><span class="p">:</span> <span class="s2">"Letter Recognition"</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">identifier</span> <span class="o">=</span> <span class="n">identifiers</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
|
||||
<span class="n">fullname</span> <span class="o">=</span> <span class="n">full_names</span><span class="p">[</span><span class="n">dataset_name</span><span class="p">]</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Loading UCI Muticlass </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1"> (</span><span class="si">{</span><span class="n">fullname</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">file</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'uci_multiclass'</span><span class="p">,</span> <span class="n">dataset_name</span><span class="o">+</span><span class="s1">'.pkl'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">download</span><span class="p">(</span><span class="nb">id</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">fetch_ucirepo</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="nb">id</span><span class="p">)</span>
|
||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'features'</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">(),</span> <span class="n">data</span><span class="p">[</span><span class="s1">'data'</span><span class="p">][</span><span class="s1">'targets'</span><span class="p">]</span><span class="o">.</span><span class="n">to_numpy</span><span class="p">()</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span>
|
||||
<span class="n">classes</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">searchsorted</span><span class="p">(</span><span class="n">classes</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">pickled_resource</span><span class="p">(</span><span class="n">file</span><span class="p">,</span> <span class="n">download</span><span class="p">,</span> <span class="n">identifier</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="n">data</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">data</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_df_replace</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">col</span><span class="p">,</span> <span class="n">repl</span><span class="o">=</span><span class="p">{</span><span class="s1">'yes'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'no'</span><span class="p">:</span><span class="mi">0</span><span class="p">},</span> <span class="n">astype</span><span class="o">=</span><span class="nb">float</span><span class="p">):</span>
|
||||
<span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span><span class="n">repl</span><span class="p">[</span><span class="n">x</span><span class="p">])</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">astype</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_lequa2022"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_lequa2022">[docs]</a><span class="k">def</span> <span class="nf">fetch_lequa2022</span><span class="p">(</span><span class="n">task</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads the official datasets provided for the `LeQua <https://lequa2022.github.io/index>`_ competition.</span>
|
||||
<span class="sd"> In brief, there are 4 tasks (T1A, T1B, T2A, T2B) having to do with text quantification</span>
|
||||
<span class="sd"> problems. Tasks T1A and T1B provide documents in vector form, while T2A and T2B provide raw documents instead.</span>
|
||||
<span class="sd"> Tasks T1A and T2A are binary sentiment quantification problems, while T2A and T2B are multiclass quantification</span>
|
||||
<span class="sd"> problems consisting of estimating the class prevalence values of 28 different merchandise products.</span>
|
||||
<span class="sd"> We refer to the `Esuli, A., Moreo, A., Sebastiani, F., & Sperduti, G. (2022).</span>
|
||||
<span class="sd"> A Detailed Overview of LeQua@ CLEF 2022: Learning to Quantify.</span>
|
||||
<span class="sd"> <https://ceur-ws.org/Vol-3180/paper-146.pdf>`_ for a detailed description</span>
|
||||
<span class="sd"> on the tasks and datasets.</span>
|
||||
|
||||
<span class="sd"> The datasets are downloaded only once, and stored for fast reuse.</span>
|
||||
|
||||
<span class="sd"> See `lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these</span>
|
||||
<span class="sd"> datasets.</span>
|
||||
|
||||
|
||||
<span class="sd"> :param task: a string representing the task name; valid ones are T1A, T1B, T2A, and T2B</span>
|
||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
||||
<span class="sd"> ~/quay_data/ directory)</span>
|
||||
<span class="sd"> :return: a tuple `(train, val_gen, test_gen)` where `train` is an instance of</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, `val_gen` and `test_gen` are instances of</span>
|
||||
<span class="sd"> :class:`quapy.data._lequa2022.SamplesFromDir`, a subclass of :class:`quapy.protocol.AbstractProtocol`,</span>
|
||||
<span class="sd"> that return a series of samples stored in a directory which are labelled by prevalence.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.data._lequa2022</span> <span class="kn">import</span> <span class="n">load_raw_documents</span><span class="p">,</span> <span class="n">load_vector_documents</span><span class="p">,</span> <span class="n">SamplesFromDir</span>
|
||||
|
||||
<span class="k">assert</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">LEQUA2022_TASKS</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'Unknown task </span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">. Valid ones are </span><span class="si">{</span><span class="n">LEQUA2022_TASKS</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
||||
|
||||
<span class="n">URL_TRAINDEV</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.train_dev.zip'</span>
|
||||
<span class="n">URL_TEST</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.test.zip'</span>
|
||||
<span class="n">URL_TEST_PREV</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/record/6546188/files/</span><span class="si">{</span><span class="n">task</span><span class="si">}</span><span class="s1">.test_prevalences.zip'</span>
|
||||
|
||||
<span class="n">lequa_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'lequa2022'</span><span class="p">)</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">download_unzip_and_remove</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>
|
||||
<span class="n">tmp_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span> <span class="o">+</span> <span class="s1">'_tmp.zip'</span><span class="p">)</span>
|
||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">tmp_path</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
|
||||
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">)</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">)):</span>
|
||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TRAINDEV</span><span class="p">)</span>
|
||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TEST</span><span class="p">)</span>
|
||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">URL_TEST_PREV</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">task</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'T1A'</span><span class="p">,</span> <span class="s1">'T1B'</span><span class="p">]:</span>
|
||||
<span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_vector_documents</span>
|
||||
<span class="k">elif</span> <span class="n">task</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'T2A'</span><span class="p">,</span> <span class="s1">'T2B'</span><span class="p">]:</span>
|
||||
<span class="n">load_fn</span> <span class="o">=</span> <span class="n">load_raw_documents</span>
|
||||
|
||||
<span class="n">tr_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'training_data.txt'</span><span class="p">)</span>
|
||||
<span class="n">train</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">tr_path</span><span class="p">,</span> <span class="n">loader_func</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
|
||||
|
||||
<span class="n">val_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'dev_samples'</span><span class="p">)</span>
|
||||
<span class="n">val_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'dev_prevalences.txt'</span><span class="p">)</span>
|
||||
<span class="n">val_gen</span> <span class="o">=</span> <span class="n">SamplesFromDir</span><span class="p">(</span><span class="n">val_samples_path</span><span class="p">,</span> <span class="n">val_true_prev_path</span><span class="p">,</span> <span class="n">load_fn</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
|
||||
|
||||
<span class="n">test_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'test_samples'</span><span class="p">)</span>
|
||||
<span class="n">test_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">lequa_dir</span><span class="p">,</span> <span class="n">task</span><span class="p">,</span> <span class="s1">'public'</span><span class="p">,</span> <span class="s1">'test_prevalences.txt'</span><span class="p">)</span>
|
||||
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">SamplesFromDir</span><span class="p">(</span><span class="n">test_samples_path</span><span class="p">,</span> <span class="n">test_true_prev_path</span><span class="p">,</span> <span class="n">load_fn</span><span class="o">=</span><span class="n">load_fn</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">train</span><span class="p">,</span> <span class="n">val_gen</span><span class="p">,</span> <span class="n">test_gen</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="fetch_IFCB"><a class="viewcode-back" href="../../../quapy.data.html#quapy.data.datasets.fetch_IFCB">[docs]</a><span class="k">def</span> <span class="nf">fetch_IFCB</span><span class="p">(</span><span class="n">single_sample_train</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">data_home</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Loads the IFCB dataset for quantification from `Zenodo <https://zenodo.org/records/10036244>`_ (for more</span>
|
||||
<span class="sd"> information on this dataset, please follow the zenodo link).</span>
|
||||
<span class="sd"> This dataset is based on the data available publicly at</span>
|
||||
<span class="sd"> `WHOI-Plankton repo <https://github.com/hsosik/WHOI-Plankton>`_.</span>
|
||||
<span class="sd"> The scripts for the processing are available at `P. González's repo <https://github.com/pglez82/IFCB_Zenodo>`_.</span>
|
||||
<span class="sd"> Basically, this is the IFCB dataset with precomputed features for testing quantification algorithms.</span>
|
||||
|
||||
<span class="sd"> The datasets are downloaded only once, and stored for fast reuse.</span>
|
||||
|
||||
<span class="sd"> :param single_sample_train: a boolean. If true, it will return the train dataset as a</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (all examples together).</span>
|
||||
<span class="sd"> If false, a generator of training samples will be returned. Each example in the training set has an individual label.</span>
|
||||
<span class="sd"> :param for_model_selection: if True, then returns a split 30% of the training set (86 out of 286 samples) to be used for model selection; </span>
|
||||
<span class="sd"> if False, then returns the full training set as training set and the test set as the test set</span>
|
||||
<span class="sd"> :param data_home: specify the quapy home directory where collections will be dumped (leave empty to use the default</span>
|
||||
<span class="sd"> ~/quay_data/ directory)</span>
|
||||
<span class="sd"> :return: a tuple `(train, test_gen)` where `train` is an instance of</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, if `single_sample_train` is true or</span>
|
||||
<span class="sd"> :class:`quapy.data._ifcb.IFCBTrainSamplesFromDir`, i.e. a sampling protocol that returns a series of samples</span>
|
||||
<span class="sd"> labelled example by example. test_gen will be a :class:`quapy.data._ifcb.IFCBTestSamples`, </span>
|
||||
<span class="sd"> i.e., a sampling protocol that returns a series of samples labelled by prevalence.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.data._ifcb</span> <span class="kn">import</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">,</span> <span class="n">IFCBTestSamples</span><span class="p">,</span> <span class="n">get_sample_list</span><span class="p">,</span> <span class="n">generate_modelselection_split</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">data_home</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">data_home</span> <span class="o">=</span> <span class="n">get_quapy_home</span><span class="p">()</span>
|
||||
|
||||
<span class="n">URL_TRAIN</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/records/10036244/files/IFCB.train.zip'</span>
|
||||
<span class="n">URL_TEST</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/records/10036244/files/IFCB.test.zip'</span>
|
||||
<span class="n">URL_TEST_PREV</span><span class="o">=</span><span class="sa">f</span><span class="s1">'https://zenodo.org/records/10036244/files/IFCB.test_prevalences.zip'</span>
|
||||
|
||||
<span class="n">ifcb_dir</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">data_home</span><span class="p">,</span> <span class="s1">'ifcb'</span><span class="p">)</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">download_unzip_and_remove</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">,</span> <span class="n">url</span><span class="p">):</span>
|
||||
<span class="n">tmp_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="s1">'ifcb_tmp.zip'</span><span class="p">)</span>
|
||||
<span class="n">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">tmp_path</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">zipfile</span><span class="o">.</span><span class="n">ZipFile</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span>
|
||||
<span class="n">file</span><span class="o">.</span><span class="n">extractall</span><span class="p">(</span><span class="n">unzipped_path</span><span class="p">)</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'train'</span><span class="p">)):</span>
|
||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TRAIN</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'test'</span><span class="p">)):</span>
|
||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TEST</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'test_prevalences.csv'</span><span class="p">)):</span>
|
||||
<span class="n">download_unzip_and_remove</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="n">URL_TEST_PREV</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Load test prevalences and classes</span>
|
||||
<span class="n">test_true_prev_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span> <span class="s1">'test_prevalences.csv'</span><span class="p">)</span>
|
||||
<span class="n">test_true_prev</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="n">test_true_prev_path</span><span class="p">)</span>
|
||||
<span class="n">classes</span> <span class="o">=</span> <span class="n">test_true_prev</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
|
||||
|
||||
<span class="c1">#Load train and test samples</span>
|
||||
<span class="n">train_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'train'</span><span class="p">)</span>
|
||||
<span class="n">test_samples_path</span> <span class="o">=</span> <span class="n">join</span><span class="p">(</span><span class="n">ifcb_dir</span><span class="p">,</span><span class="s1">'test'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">for_model_selection</span><span class="p">:</span>
|
||||
<span class="c1"># In this case, return 70% of training data as the training set and 30% as the test set</span>
|
||||
<span class="n">samples</span> <span class="o">=</span> <span class="n">get_sample_list</span><span class="p">(</span><span class="n">train_samples_path</span><span class="p">)</span>
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">generate_modelselection_split</span><span class="p">(</span><span class="n">samples</span><span class="p">,</span> <span class="n">split</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
|
||||
<span class="n">train_gen</span> <span class="o">=</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">,</span> <span class="n">samples</span><span class="o">=</span><span class="n">train</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># Test prevalence is computed from class labels</span>
|
||||
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">IFCBTestSamples</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">test_prevalences</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">samples</span><span class="o">=</span><span class="n">test</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># In this case, we use all training samples as the training set and the test samples as the test set</span>
|
||||
<span class="n">train_gen</span> <span class="o">=</span> <span class="n">IFCBTrainSamplesFromDir</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">train_samples_path</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="n">classes</span><span class="p">)</span>
|
||||
<span class="n">test_gen</span> <span class="o">=</span> <span class="n">IFCBTestSamples</span><span class="p">(</span><span class="n">path_dir</span><span class="o">=</span><span class="n">test_samples_path</span><span class="p">,</span> <span class="n">test_prevalences</span><span class="o">=</span><span class="n">test_true_prev</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># In the case the user wants it, join all the train samples in one LabelledCollection</span>
|
||||
<span class="k">if</span> <span class="n">single_sample_train</span><span class="p">:</span>
|
||||
<span class="n">train</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">lc</span> <span class="k">for</span> <span class="n">lc</span> <span class="ow">in</span> <span class="n">train_gen</span><span class="p">()])</span>
|
||||
<span class="k">return</span> <span class="n">train</span><span class="p">,</span> <span class="n">test_gen</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">train_gen</span><span class="p">,</span> <span class="n">test_gen</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,373 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.data.preprocessing — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.data.preprocessing</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.data.preprocessing</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">spmatrix</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.feature_extraction.text</span> <span class="kn">import</span> <span class="n">TfidfVectorizer</span><span class="p">,</span> <span class="n">CountVectorizer</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.preprocessing</span> <span class="kn">import</span> <span class="n">StandardScaler</span>
|
||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">Dataset</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">map_parallel</span>
|
||||
<span class="kn">from</span> <span class="nn">.base</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="text2tfidf">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.text2tfidf">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">text2tfidf</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span><span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">sublinear_tf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Transforms a :class:`quapy.data.base.Dataset` of textual instances into a :class:`quapy.data.base.Dataset` of</span>
|
||||
<span class="sd"> tfidf weighted sparse vectors</span>
|
||||
|
||||
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` where the instances of training and test collections are</span>
|
||||
<span class="sd"> lists of str</span>
|
||||
<span class="sd"> :param min_df: minimum number of occurrences for a word to be considered as part of the vocabulary (default 3)</span>
|
||||
<span class="sd"> :param sublinear_tf: whether or not to apply the log scalling to the tf counters (default True)</span>
|
||||
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
|
||||
<span class="sd"> :param kwargs: the rest of parameters of the transformation (as for sklearn's</span>
|
||||
<span class="sd"> `TfidfVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html>`_)</span>
|
||||
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` in `csr_matrix` format (if inplace=False) or a reference to the</span>
|
||||
<span class="sd"> current Dataset (if inplace=True) where the instances are stored in a `csr_matrix` of real-valued tfidf scores</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
||||
|
||||
<span class="n">vectorizer</span> <span class="o">=</span> <span class="n">TfidfVectorizer</span><span class="p">(</span><span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="n">sublinear_tf</span><span class="o">=</span><span class="n">sublinear_tf</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
<span class="n">training_documents</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">test_documents</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">vocabulary_</span>
|
||||
<span class="k">return</span> <span class="n">dataset</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_documents</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="reduce_columns">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.reduce_columns">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">reduce_columns</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Reduces the dimensionality of the instances, represented as a `csr_matrix` (or any subtype of</span>
|
||||
<span class="sd"> `scipy.sparse.spmatrix`), of training and test documents by removing the columns of words which are not present</span>
|
||||
<span class="sd"> in at least `min_df` instances in the training set</span>
|
||||
|
||||
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` in which instances are represented in sparse format (any</span>
|
||||
<span class="sd"> subtype of scipy.sparse.spmatrix)</span>
|
||||
<span class="sd"> :param min_df: integer, minimum number of instances below which the columns are removed</span>
|
||||
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
|
||||
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current</span>
|
||||
<span class="sd"> :class:`quapy.data.base.Dataset` (inplace=True) where the dimensions corresponding to infrequent terms</span>
|
||||
<span class="sd"> in the training set have been removed</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">spmatrix</span><span class="p">)</span>
|
||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">spmatrix</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s1">'unaligned vector spaces'</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">filter_by_occurrences</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">W</span><span class="p">):</span>
|
||||
<span class="n">column_prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">((</span><span class="n">X</span> <span class="o">></span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
||||
<span class="n">take_columns</span> <span class="o">=</span> <span class="n">column_prevalence</span> <span class="o">>=</span> <span class="n">min_df</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span> <span class="n">take_columns</span><span class="p">]</span>
|
||||
<span class="n">W</span> <span class="o">=</span> <span class="n">W</span><span class="p">[:,</span> <span class="n">take_columns</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">W</span>
|
||||
|
||||
<span class="n">Xtr</span><span class="p">,</span> <span class="n">Xte</span> <span class="o">=</span> <span class="n">filter_by_occurrences</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">Xtr</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">Xte</span>
|
||||
<span class="k">return</span> <span class="n">dataset</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">Xtr</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">Xte</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="standardize">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.standardize">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">standardize</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Standardizes the real-valued columns of a :class:`quapy.data.base.Dataset`.</span>
|
||||
<span class="sd"> Standardization, aka z-scoring, of a variable `X` comes down to subtracting the average and normalizing by the</span>
|
||||
<span class="sd"> standard deviation.</span>
|
||||
|
||||
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` object</span>
|
||||
<span class="sd"> :param inplace: set to True if the transformation is to be applied inplace, or to False (default) if a new</span>
|
||||
<span class="sd"> :class:`quapy.data.base.Dataset` is to be returned</span>
|
||||
<span class="sd"> :return: an instance of :class:`quapy.data.base.Dataset`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="n">StandardScaler</span><span class="p">(</span><span class="n">copy</span><span class="o">=</span><span class="ow">not</span> <span class="n">inplace</span><span class="p">)</span>
|
||||
<span class="n">training</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">dataset</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="index">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.index">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">index</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Indexes the tokens of a textual :class:`quapy.data.base.Dataset` of string documents.</span>
|
||||
<span class="sd"> To index a document means to replace each different token by a unique numerical index.</span>
|
||||
<span class="sd"> Rare words (i.e., words occurring less than `min_df` times) are replaced by a special token `UNK`</span>
|
||||
|
||||
<span class="sd"> :param dataset: a :class:`quapy.data.base.Dataset` object where the instances of training and test documents</span>
|
||||
<span class="sd"> are lists of str</span>
|
||||
<span class="sd"> :param min_df: minimum number of occurrences below which the term is replaced by a `UNK` index</span>
|
||||
<span class="sd"> :param inplace: whether or not to apply the transformation inplace (True), or to a new copy (False, default)</span>
|
||||
<span class="sd"> :param kwargs: the rest of parameters of the transformation (as for sklearn's</span>
|
||||
<span class="sd"> `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>_`)</span>
|
||||
<span class="sd"> :return: a new :class:`quapy.data.base.Dataset` (if inplace=False) or a reference to the current</span>
|
||||
<span class="sd"> :class:`quapy.data.base.Dataset` (inplace=True) consisting of lists of integer values representing indices.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
||||
<span class="n">__check_type</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
|
||||
|
||||
<span class="n">indexer</span> <span class="o">=</span> <span class="n">IndexTransformer</span><span class="p">(</span><span class="n">min_df</span><span class="o">=</span><span class="n">min_df</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
<span class="n">training_index</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">test_index</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="n">training_index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
|
||||
<span class="n">test_index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">object</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary</span> <span class="o">=</span> <span class="n">indexer</span><span class="o">.</span><span class="n">vocabulary_</span>
|
||||
<span class="k">return</span> <span class="n">dataset</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">training</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">training_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">test_index</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">dataset</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">test</span><span class="p">,</span> <span class="n">indexer</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">__check_type</span><span class="p">(</span><span class="n">container</span><span class="p">,</span> <span class="n">container_type</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">element_type</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">container_type</span><span class="p">:</span>
|
||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">container</span><span class="p">,</span> <span class="n">container_type</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'unexpected type of container (expected </span><span class="si">{</span><span class="n">container_type</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">container</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span>
|
||||
<span class="k">if</span> <span class="n">element_type</span><span class="p">:</span>
|
||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">container</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">element_type</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'unexpected type of element (expected </span><span class="si">{</span><span class="n">container_type</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">container</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="IndexTransformer">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">IndexTransformer</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> This class implements a sklearn's-style transformer that indexes text as numerical ids for the tokens it</span>
|
||||
<span class="sd"> contains, and that would be generated by sklearn's</span>
|
||||
<span class="sd"> `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>`_</span>
|
||||
|
||||
<span class="sd"> :param kwargs: keyworded arguments from</span>
|
||||
<span class="sd"> `CountVectorizer <https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html>`_</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vect</span> <span class="o">=</span> <span class="n">CountVectorizer</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="c1"># a valid index is assigned after fit</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pad</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span> <span class="c1"># a valid index is assigned after fit</span>
|
||||
|
||||
<div class="viewcode-block" id="IndexTransformer.fit">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.fit">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Fits the transformer, i.e., decides on the vocabulary, given a list of strings.</span>
|
||||
|
||||
<span class="sd"> :param X: a list of strings</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">analyzer</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">build_analyzer</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vect</span><span class="o">.</span><span class="n">vocabulary_</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'UNK_TOKEN'</span><span class="p">],</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'UNK_INDEX'</span><span class="p">])</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">pad</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'PAD_TOKEN'</span><span class="p">],</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'PAD_INDEX'</span><span class="p">])</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="IndexTransformer.transform">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.transform">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Transforms the strings in `X` as lists of numerical ids</span>
|
||||
|
||||
<span class="sd"> :param X: a list of strings</span>
|
||||
<span class="sd"> :param n_jobs: the number of parallel workers to carry out this task</span>
|
||||
<span class="sd"> :return: a `np.ndarray` of numerical ids</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="c1"># given the number of tasks and the number of jobs, generates the slices for the parallel processes</span>
|
||||
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">unk</span> <span class="o">!=</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="s1">'transform called before fit'</span>
|
||||
<span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">map_parallel</span><span class="p">(</span><span class="n">func</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_index</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_index</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">documents</span><span class="p">):</span>
|
||||
<span class="n">vocab</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="p">[[</span><span class="n">vocab</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">word</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">unk</span><span class="p">)</span> <span class="k">for</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">analyzer</span><span class="p">(</span><span class="n">doc</span><span class="p">)]</span> <span class="k">for</span> <span class="n">doc</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">documents</span><span class="p">,</span> <span class="s1">'indexing'</span><span class="p">)]</span>
|
||||
|
||||
<div class="viewcode-block" id="IndexTransformer.fit_transform">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.fit_transform">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">fit_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Fits the transform on `X` and transforms it.</span>
|
||||
|
||||
<span class="sd"> :param X: a list of strings</span>
|
||||
<span class="sd"> :param n_jobs: the number of parallel workers to carry out this task</span>
|
||||
<span class="sd"> :return: a `np.ndarray` of numerical ids</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="IndexTransformer.vocabulary_size">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.vocabulary_size">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">vocabulary_size</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Gets the length of the vocabulary according to which the document tokens have been indexed</span>
|
||||
|
||||
<span class="sd"> :return: integer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="IndexTransformer.add_word">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.preprocessing.IndexTransformer.add_word">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">add_word</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">word</span><span class="p">,</span> <span class="nb">id</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">nogaps</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Adds a new token (regardless of whether it has been found in the text or not), with dedicated id.</span>
|
||||
<span class="sd"> Useful to define special tokens for codifying unknown words, or padding tokens.</span>
|
||||
|
||||
<span class="sd"> :param word: string, surface form of the token</span>
|
||||
<span class="sd"> :param id: integer, numerical value to assign to the token (leave as None for indicating the next valid id,</span>
|
||||
<span class="sd"> default)</span>
|
||||
<span class="sd"> :param nogaps: if set to True (default) asserts that the id indicated leads to no numerical gaps with</span>
|
||||
<span class="sd"> precedent ids stored so far</span>
|
||||
<span class="sd"> :return: integer, the numerical id for the new token</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">word</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'word </span><span class="si">{</span><span class="n">word</span><span class="si">}</span><span class="s1"> already in dictionary'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">id</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="c1"># add the word with the next id</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">id2word</span> <span class="o">=</span> <span class="p">{</span><span class="n">id_</span><span class="p">:</span><span class="n">word_</span> <span class="k">for</span> <span class="n">word_</span><span class="p">,</span> <span class="n">id_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||||
<span class="k">if</span> <span class="nb">id</span> <span class="ow">in</span> <span class="n">id2word</span><span class="p">:</span>
|
||||
<span class="n">old_word</span> <span class="o">=</span> <span class="n">id2word</span><span class="p">[</span><span class="nb">id</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span> <span class="o">=</span> <span class="nb">id</span>
|
||||
<span class="k">del</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">old_word</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">add_word</span><span class="p">(</span><span class="n">old_word</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">nogaps</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">id</span> <span class="o">></span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">()</span><span class="o">+</span><span class="mi">1</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'word </span><span class="si">{</span><span class="n">word</span><span class="si">}</span><span class="s1"> added with id </span><span class="si">{</span><span class="nb">id</span><span class="si">}</span><span class="s1">, while the current vocabulary size '</span>
|
||||
<span class="sa">f</span><span class="s1">'is of </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">()</span><span class="si">}</span><span class="s1">, and id gaps are not allowed'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">vocabulary_</span><span class="p">[</span><span class="n">word</span><span class="p">]</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,244 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.data.reader — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.data.reader</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.data.reader</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">dok_matrix</span>
|
||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="from_text">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_text">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">from_text</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">class2int</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Reads a labelled colletion of documents.</span>
|
||||
<span class="sd"> File fomart <0 or 1>\t<document>\n</span>
|
||||
|
||||
<span class="sd"> :param path: path to the labelled collection</span>
|
||||
<span class="sd"> :param encoding: the text encoding used to open the file</span>
|
||||
<span class="sd"> :param verbose: if >0 (default) shows some progress information in standard output</span>
|
||||
<span class="sd"> :return: a list of sentences, and a list of labels</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">all_sentences</span><span class="p">,</span> <span class="n">all_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="o">></span><span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">file</span> <span class="o">=</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="sa">f</span><span class="s1">'loading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">file</span><span class="p">:</span>
|
||||
<span class="n">line</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">line</span><span class="p">:</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">label</span><span class="p">,</span> <span class="n">sentence</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">'</span><span class="se">\t</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="n">sentence</span> <span class="o">=</span> <span class="n">sentence</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">class2int</span><span class="p">:</span>
|
||||
<span class="n">label</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">sentence</span><span class="p">:</span>
|
||||
<span class="n">all_sentences</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sentence</span><span class="p">)</span>
|
||||
<span class="n">all_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'format error in </span><span class="si">{</span><span class="n">line</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">all_sentences</span><span class="p">,</span> <span class="n">all_labels</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="from_sparse">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_sparse">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">from_sparse</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Reads a labelled collection of real-valued instances expressed in sparse format</span>
|
||||
<span class="sd"> File format <-1 or 0 or 1>[\s col(int):val(float)]\n</span>
|
||||
|
||||
<span class="sd"> :param path: path to the labelled collection</span>
|
||||
<span class="sd"> :return: a `csr_matrix` containing the instances (rows), and a ndarray containing the labels</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">split_col_val</span><span class="p">(</span><span class="n">col_val</span><span class="p">):</span>
|
||||
<span class="n">col</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">col_val</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">':'</span><span class="p">)</span>
|
||||
<span class="n">col</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">col</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">val</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">col</span><span class="p">,</span> <span class="n">val</span>
|
||||
|
||||
<span class="n">all_documents</span><span class="p">,</span> <span class="n">all_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="n">max_col</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="sa">f</span><span class="s1">'loading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">'</span><span class="p">):</span>
|
||||
<span class="n">parts</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">parts</span><span class="p">:</span>
|
||||
<span class="n">all_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">parts</span><span class="p">[</span><span class="mi">0</span><span class="p">]))</span>
|
||||
<span class="n">cols</span><span class="p">,</span> <span class="n">vals</span> <span class="o">=</span> <span class="nb">zip</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">split_col_val</span><span class="p">(</span><span class="n">col_val</span><span class="p">)</span> <span class="k">for</span> <span class="n">col_val</span> <span class="ow">in</span> <span class="n">parts</span><span class="p">[</span><span class="mi">1</span><span class="p">:]])</span>
|
||||
<span class="n">cols</span><span class="p">,</span> <span class="n">vals</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">cols</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">vals</span><span class="p">)</span>
|
||||
<span class="n">max_col</span> <span class="o">=</span> <span class="nb">max</span><span class="p">(</span><span class="n">max_col</span><span class="p">,</span> <span class="n">cols</span><span class="o">.</span><span class="n">max</span><span class="p">())</span>
|
||||
<span class="n">all_documents</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">cols</span><span class="p">,</span> <span class="n">vals</span><span class="p">))</span>
|
||||
<span class="n">n_docs</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">all_labels</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">dok_matrix</span><span class="p">((</span><span class="n">n_docs</span><span class="p">,</span> <span class="n">max_col</span> <span class="o">+</span> <span class="mi">1</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">cols</span><span class="p">,</span> <span class="n">vals</span><span class="p">)</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">enumerate</span><span class="p">(</span><span class="n">all_documents</span><span class="p">),</span> <span class="n">total</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">all_documents</span><span class="p">),</span>
|
||||
<span class="n">desc</span><span class="o">=</span><span class="sa">f</span><span class="s1">'\-- filling matrix of shape </span><span class="si">{</span><span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">'</span><span class="p">):</span>
|
||||
<span class="n">X</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">cols</span><span class="p">]</span> <span class="o">=</span> <span class="n">vals</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">tocsr</span><span class="p">()</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">all_labels</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span>
|
||||
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="from_csv">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.from_csv">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">from_csv</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s1">'utf-8'</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Reads a csv file in which columns are separated by ','.</span>
|
||||
<span class="sd"> File format <label>,<feat1>,<feat2>,...,<featn>\n</span>
|
||||
|
||||
<span class="sd"> :param path: path to the csv file</span>
|
||||
<span class="sd"> :param encoding: the text encoding used to open the file</span>
|
||||
<span class="sd"> :return: a np.ndarray for the labels and a ndarray (float) for the covariates</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">instance</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s1">'rt'</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="n">encoding</span><span class="p">)</span><span class="o">.</span><span class="n">readlines</span><span class="p">(),</span> <span class="n">desc</span><span class="o">=</span><span class="sa">f</span><span class="s1">'reading </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s1">'</span><span class="p">):</span>
|
||||
<span class="n">yi</span><span class="p">,</span> <span class="o">*</span><span class="n">xi</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">','</span><span class="p">)</span>
|
||||
<span class="n">X</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">float</span><span class="p">,</span><span class="n">xi</span><span class="p">)))</span>
|
||||
<span class="n">y</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">yi</span><span class="p">)</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="reindex_labels">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.reindex_labels">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">reindex_labels</span><span class="p">(</span><span class="n">y</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Re-indexes a list of labels as a list of indexes, and returns the classnames corresponding to the indexes.</span>
|
||||
<span class="sd"> E.g.:</span>
|
||||
|
||||
<span class="sd"> >>> reindex_labels(['B', 'B', 'A', 'C'])</span>
|
||||
<span class="sd"> >>> (array([1, 1, 0, 2]), array(['A', 'B', 'C'], dtype='<U1'))</span>
|
||||
|
||||
<span class="sd"> :param y: the list or array of original labels</span>
|
||||
<span class="sd"> :return: a ndarray (int) of class indexes, and a ndarray of classnames corresponding to the indexes.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
|
||||
<span class="n">classnames</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span>
|
||||
<span class="n">label2index</span> <span class="o">=</span> <span class="p">{</span><span class="n">label</span><span class="p">:</span> <span class="n">index</span> <span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">label</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">classnames</span><span class="p">)}</span>
|
||||
<span class="n">indexed</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">y</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">classnames</span><span class="p">:</span>
|
||||
<span class="n">indexed</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">label</span><span class="p">]</span> <span class="o">=</span> <span class="n">label2index</span><span class="p">[</span><span class="n">label</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="n">indexed</span><span class="p">,</span> <span class="n">classnames</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="binarize">
|
||||
<a class="viewcode-back" href="../../../quapy.data.html#quapy.data.reader.binarize">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">binarize</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">pos_class</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Binarizes a categorical array-like collection of labels towards the positive class `pos_class`. E.g.,:</span>
|
||||
|
||||
<span class="sd"> >>> binarize([1, 2, 3, 1, 1, 0], pos_class=2)</span>
|
||||
<span class="sd"> >>> array([0, 1, 0, 0, 0, 0])</span>
|
||||
|
||||
<span class="sd"> :param y: array-like of labels</span>
|
||||
<span class="sd"> :param pos_class: integer, the positive class</span>
|
||||
<span class="sd"> :return: a binary np.ndarray, in which values 1 corresponds to positions in whcih `y` had `pos_class` labels, and</span>
|
||||
<span class="sd"> 0 otherwise</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">y</span><span class="p">)</span>
|
||||
<span class="n">ybin</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">y</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)</span>
|
||||
<span class="n">ybin</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">pos_class</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="k">return</span> <span class="n">ybin</span></div>
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,433 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.error — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||||
<script src="../../_static/jquery.js"></script>
|
||||
<script src="../../_static/underscore.js"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../_static/doctools.js"></script>
|
||||
<script src="../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.error</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.error</h1><div class="highlight"><pre>
|
||||
<span></span><span class="sd">"""Implementation of error measures used for quantification"""</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">f1_score</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="from_name"><a class="viewcode-back" href="../../quapy.html#quapy.error.from_name">[docs]</a><span class="k">def</span> <span class="nf">from_name</span><span class="p">(</span><span class="n">err_name</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Gets an error function from its name. E.g., `from_name("mae")`</span>
|
||||
<span class="sd"> will return function :meth:`quapy.error.mae`</span>
|
||||
|
||||
<span class="sd"> :param err_name: string, the error name</span>
|
||||
<span class="sd"> :return: a callable implementing the requested error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">err_name</span> <span class="ow">in</span> <span class="n">ERROR_NAMES</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'unknown error </span><span class="si">{</span><span class="n">err_name</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="n">callable_error</span> <span class="o">=</span> <span class="nb">globals</span><span class="p">()[</span><span class="n">err_name</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="n">callable_error</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="f1e"><a class="viewcode-back" href="../../quapy.html#quapy.error.f1e">[docs]</a><span class="k">def</span> <span class="nf">f1e</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""F1 error: simply computes the error in terms of macro :math:`F_1`, i.e.,</span>
|
||||
<span class="sd"> :math:`1-F_1^M`, where :math:`F_1` is the harmonic mean of precision and recall,</span>
|
||||
<span class="sd"> defined as :math:`\\frac{2tp}{2tp+fp+fn}`, with `tp`, `fp`, and `fn` standing</span>
|
||||
<span class="sd"> for true positives, false positives, and false negatives, respectively.</span>
|
||||
<span class="sd"> `Macro` averaging means the :math:`F_1` is computed for each category independently,</span>
|
||||
<span class="sd"> and then averaged.</span>
|
||||
|
||||
<span class="sd"> :param y_true: array-like of true labels</span>
|
||||
<span class="sd"> :param y_pred: array-like of predicted labels</span>
|
||||
<span class="sd"> :return: :math:`1-F_1^M`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="mf">1.</span> <span class="o">-</span> <span class="n">f1_score</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">,</span> <span class="n">average</span><span class="o">=</span><span class="s1">'macro'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="acce"><a class="viewcode-back" href="../../quapy.html#quapy.error.acce">[docs]</a><span class="k">def</span> <span class="nf">acce</span><span class="p">(</span><span class="n">y_true</span><span class="p">,</span> <span class="n">y_pred</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the error in terms of 1-accuracy. The accuracy is computed as</span>
|
||||
<span class="sd"> :math:`\\frac{tp+tn}{tp+fp+fn+tn}`, with `tp`, `fp`, `fn`, and `tn` standing</span>
|
||||
<span class="sd"> for true positives, false positives, false negatives, and true negatives,</span>
|
||||
<span class="sd"> respectively</span>
|
||||
|
||||
<span class="sd"> :param y_true: array-like of true labels</span>
|
||||
<span class="sd"> :param y_pred: array-like of predicted labels</span>
|
||||
<span class="sd"> :return: 1-accuracy</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="mf">1.</span> <span class="o">-</span> <span class="p">(</span><span class="n">y_true</span> <span class="o">==</span> <span class="n">y_pred</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mae"><a class="viewcode-back" href="../../quapy.html#quapy.error.mae">[docs]</a><span class="k">def</span> <span class="nf">mae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the mean absolute error (see :meth:`quapy.error.ae`) across the sample pairs.</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :return: mean absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">ae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ae"><a class="viewcode-back" href="../../quapy.html#quapy.error.ae">[docs]</a><span class="k">def</span> <span class="nf">ae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the absolute error between the two prevalence vectors.</span>
|
||||
<span class="sd"> Absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
|
||||
<span class="sd"> :math:`AE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}|\\hat{p}(y)-p(y)|`,</span>
|
||||
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
||||
<span class="sd"> :return: absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'wrong shape </span><span class="si">{</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1"> vs. </span><span class="si">{</span><span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="nae"><a class="viewcode-back" href="../../quapy.html#quapy.error.nae">[docs]</a><span class="k">def</span> <span class="nf">nae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the normalized absolute error between the two prevalence vectors.</span>
|
||||
<span class="sd"> Normalized absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
|
||||
<span class="sd"> :math:`NAE(p,\\hat{p})=\\frac{AE(p,\\hat{p})}{z_{AE}}`,</span>
|
||||
<span class="sd"> where :math:`z_{AE}=\\frac{2(1-\\min_{y\\in \\mathcal{Y}} p(y))}{|\\mathcal{Y}|}`, and :math:`\\mathcal{Y}`</span>
|
||||
<span class="sd"> are the classes of interest.</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
||||
<span class="sd"> :return: normalized absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span> <span class="o">==</span> <span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'wrong shape </span><span class="si">{</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1"> vs. </span><span class="si">{</span><span class="n">prevs_hat</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="mi">2</span><span class="o">*</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">prevs</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mnae"><a class="viewcode-back" href="../../quapy.html#quapy.error.mnae">[docs]</a><span class="k">def</span> <span class="nf">mnae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the mean normalized absolute error (see :meth:`quapy.error.nae`) across the sample pairs.</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :return: mean normalized absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">nae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mse"><a class="viewcode-back" href="../../quapy.html#quapy.error.mse">[docs]</a><span class="k">def</span> <span class="nf">mse</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the mean squared error (see :meth:`quapy.error.se`) across the sample pairs.</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the</span>
|
||||
<span class="sd"> true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the</span>
|
||||
<span class="sd"> predicted prevalence values</span>
|
||||
<span class="sd"> :return: mean squared error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">se</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="se"><a class="viewcode-back" href="../../quapy.html#quapy.error.se">[docs]</a><span class="k">def</span> <span class="nf">se</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the squared error between the two prevalence vectors.</span>
|
||||
<span class="sd"> Squared error between two prevalence vectors :math:`p` and :math:`\\hat{p}` is computed as</span>
|
||||
<span class="sd"> :math:`SE(p,\\hat{p})=\\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}(\\hat{p}(y)-p(y))^2`,</span>
|
||||
<span class="sd"> where</span>
|
||||
<span class="sd"> :math:`\\mathcal{Y}` are the classes of interest.</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
||||
<span class="sd"> :return: absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="p">((</span><span class="n">prevs_hat</span> <span class="o">-</span> <span class="n">prevs</span><span class="p">)</span> <span class="o">**</span> <span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mkld"><a class="viewcode-back" href="../../quapy.html#quapy.error.mkld">[docs]</a><span class="k">def</span> <span class="nf">mkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the mean Kullback-Leibler divergence (see :meth:`quapy.error.kld`) across the</span>
|
||||
<span class="sd"> sample pairs. The distributions are smoothed using the `eps` factor</span>
|
||||
<span class="sd"> (see :meth:`quapy.error.smooth`).</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain</span>
|
||||
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
|
||||
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
|
||||
<span class="sd"> (which has thus to be set beforehand).</span>
|
||||
<span class="sd"> :return: mean Kullback-Leibler distribution</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="kld"><a class="viewcode-back" href="../../quapy.html#quapy.error.kld">[docs]</a><span class="k">def</span> <span class="nf">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the Kullback-Leibler divergence between the two prevalence distributions.</span>
|
||||
<span class="sd"> Kullback-Leibler divergence between two prevalence distributions :math:`p` and :math:`\\hat{p}`</span>
|
||||
<span class="sd"> is computed as</span>
|
||||
<span class="sd"> :math:`KLD(p,\\hat{p})=D_{KL}(p||\\hat{p})=</span>
|
||||
<span class="sd"> \\sum_{y\\in \\mathcal{Y}} p(y)\\log\\frac{p(y)}{\\hat{p}(y)}`,</span>
|
||||
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
|
||||
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor. KLD is not defined in cases in which the distributions contain</span>
|
||||
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
|
||||
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
|
||||
<span class="sd"> (which has thus to be set beforehand).</span>
|
||||
<span class="sd"> :return: Kullback-Leibler divergence between the two distributions</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
|
||||
<span class="n">smooth_prevs</span> <span class="o">=</span> <span class="n">prevs</span> <span class="o">+</span> <span class="n">eps</span>
|
||||
<span class="n">smooth_prevs_hat</span> <span class="o">=</span> <span class="n">prevs_hat</span> <span class="o">+</span> <span class="n">eps</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">smooth_prevs</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">smooth_prevs</span><span class="o">/</span><span class="n">smooth_prevs_hat</span><span class="p">))</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mnkld"><a class="viewcode-back" href="../../quapy.html#quapy.error.mnkld">[docs]</a><span class="k">def</span> <span class="nf">mnkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the mean Normalized Kullback-Leibler divergence (see :meth:`quapy.error.nkld`)</span>
|
||||
<span class="sd"> across the sample pairs. The distributions are smoothed using the `eps` factor</span>
|
||||
<span class="sd"> (see :meth:`quapy.error.smooth`).</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor. NKLD is not defined in cases in which the distributions contain</span>
|
||||
<span class="sd"> zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample size.</span>
|
||||
<span class="sd"> If `eps=None`, the sample size will be taken from the environment variable `SAMPLE_SIZE`</span>
|
||||
<span class="sd"> (which has thus to be set beforehand).</span>
|
||||
<span class="sd"> :return: mean Normalized Kullback-Leibler distribution</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">nkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="nkld"><a class="viewcode-back" href="../../quapy.html#quapy.error.nkld">[docs]</a><span class="k">def</span> <span class="nf">nkld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the Normalized Kullback-Leibler divergence between the two prevalence distributions.</span>
|
||||
<span class="sd"> Normalized Kullback-Leibler divergence between two prevalence distributions :math:`p` and</span>
|
||||
<span class="sd"> :math:`\\hat{p}` is computed as</span>
|
||||
<span class="sd"> math:`NKLD(p,\\hat{p}) = 2\\frac{e^{KLD(p,\\hat{p})}}{e^{KLD(p,\\hat{p})}+1}-1`,</span>
|
||||
<span class="sd"> where</span>
|
||||
<span class="sd"> :math:`\\mathcal{Y}` are the classes of interest.</span>
|
||||
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor. NKLD is not defined in cases in which the distributions</span>
|
||||
<span class="sd"> contain zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the sample</span>
|
||||
<span class="sd"> size. If `eps=None`, the sample size will be taken from the environment variable</span>
|
||||
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
||||
<span class="sd"> :return: Normalized Kullback-Leibler divergence between the two distributions</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">ekld</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">kld</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="mf">2.</span> <span class="o">*</span> <span class="n">ekld</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">ekld</span><span class="p">)</span> <span class="o">-</span> <span class="mf">1.</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mrae"><a class="viewcode-back" href="../../quapy.html#quapy.error.mrae">[docs]</a><span class="k">def</span> <span class="nf">mrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the mean relative absolute error (see :meth:`quapy.error.rae`) across</span>
|
||||
<span class="sd"> the sample pairs. The distributions are smoothed using the `eps` factor (see</span>
|
||||
<span class="sd"> :meth:`quapy.error.smooth`).</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor. `mrae` is not defined in cases in which the true</span>
|
||||
<span class="sd"> distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,</span>
|
||||
<span class="sd"> with :math:`T` the sample size. If `eps=None`, the sample size will be taken from</span>
|
||||
<span class="sd"> the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
||||
<span class="sd"> :return: mean relative absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">rae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="rae"><a class="viewcode-back" href="../../quapy.html#quapy.error.rae">[docs]</a><span class="k">def</span> <span class="nf">rae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the absolute relative error between the two prevalence vectors.</span>
|
||||
<span class="sd"> Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`</span>
|
||||
<span class="sd"> is computed as</span>
|
||||
<span class="sd"> :math:`RAE(p,\\hat{p})=</span>
|
||||
<span class="sd"> \\frac{1}{|\\mathcal{Y}|}\\sum_{y\\in \\mathcal{Y}}\\frac{|\\hat{p}(y)-p(y)|}{p(y)}`,</span>
|
||||
<span class="sd"> where :math:`\\mathcal{Y}` are the classes of interest.</span>
|
||||
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor. `rae` is not defined in cases in which the true distribution</span>
|
||||
<span class="sd"> contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the</span>
|
||||
<span class="sd"> sample size. If `eps=None`, the sample size will be taken from the environment variable</span>
|
||||
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
||||
<span class="sd"> :return: relative absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
|
||||
<span class="n">prevs_hat</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">prevs</span> <span class="o">-</span> <span class="n">prevs_hat</span><span class="p">)</span> <span class="o">/</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="nrae"><a class="viewcode-back" href="../../quapy.html#quapy.error.nrae">[docs]</a><span class="k">def</span> <span class="nf">nrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the normalized absolute relative error between the two prevalence vectors.</span>
|
||||
<span class="sd"> Relative absolute error between two prevalence vectors :math:`p` and :math:`\\hat{p}`</span>
|
||||
<span class="sd"> is computed as</span>
|
||||
<span class="sd"> :math:`NRAE(p,\\hat{p})= \\frac{RAE(p,\\hat{p})}{z_{RAE}}`,</span>
|
||||
<span class="sd"> where</span>
|
||||
<span class="sd"> :math:`z_{RAE} = \\frac{|\\mathcal{Y}|-1+\\frac{1-\\min_{y\\in \\mathcal{Y}} p(y)}{\\min_{y\\in \\mathcal{Y}} p(y)}}{|\\mathcal{Y}|}`</span>
|
||||
<span class="sd"> and :math:`\\mathcal{Y}` are the classes of interest.</span>
|
||||
<span class="sd"> The distributions are smoothed using the `eps` factor (see :meth:`quapy.error.smooth`).</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor. `nrae` is not defined in cases in which the true distribution</span>
|
||||
<span class="sd"> contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`, with :math:`T` the</span>
|
||||
<span class="sd"> sample size. If `eps=None`, the sample size will be taken from the environment variable</span>
|
||||
<span class="sd"> `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
||||
<span class="sd"> :return: normalized relative absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">eps</span> <span class="o">=</span> <span class="n">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="p">)</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
|
||||
<span class="n">prevs_hat</span> <span class="o">=</span> <span class="n">smooth</span><span class="p">(</span><span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span>
|
||||
<span class="n">min_p</span> <span class="o">=</span> <span class="n">prevs</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="nb">abs</span><span class="p">(</span><span class="n">prevs</span> <span class="o">-</span> <span class="n">prevs_hat</span><span class="p">)</span> <span class="o">/</span> <span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">1</span><span class="o">+</span><span class="p">(</span><span class="mi">1</span><span class="o">-</span><span class="n">min_p</span><span class="p">)</span><span class="o">/</span><span class="n">min_p</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mnrae"><a class="viewcode-back" href="../../quapy.html#quapy.error.mnrae">[docs]</a><span class="k">def</span> <span class="nf">mnrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Computes the mean normalized relative absolute error (see :meth:`quapy.error.nrae`) across</span>
|
||||
<span class="sd"> the sample pairs. The distributions are smoothed using the `eps` factor (see</span>
|
||||
<span class="sd"> :meth:`quapy.error.smooth`).</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_samples, n_classes,)` with the true</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted</span>
|
||||
<span class="sd"> prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor. `mnrae` is not defined in cases in which the true</span>
|
||||
<span class="sd"> distribution contains zeros; `eps` is typically set to be :math:`\\frac{1}{2T}`,</span>
|
||||
<span class="sd"> with :math:`T` the sample size. If `eps=None`, the sample size will be taken from</span>
|
||||
<span class="sd"> the environment variable `SAMPLE_SIZE` (which has thus to be set beforehand).</span>
|
||||
<span class="sd"> :return: mean normalized relative absolute error</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">nrae</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">prevs_hat</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="smooth"><a class="viewcode-back" href="../../quapy.html#quapy.error.smooth">[docs]</a><span class="k">def</span> <span class="nf">smooth</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">eps</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">""" Smooths a prevalence distribution with :math:`\\epsilon` (`eps`) as:</span>
|
||||
<span class="sd"> :math:`\\underline{p}(y)=\\frac{\\epsilon+p(y)}{\\epsilon|\\mathcal{Y}|+</span>
|
||||
<span class="sd"> \\displaystyle\\sum_{y\\in \\mathcal{Y}}p(y)}`</span>
|
||||
|
||||
<span class="sd"> :param prevs: array-like of shape `(n_classes,)` with the true prevalence values</span>
|
||||
<span class="sd"> :param eps: smoothing factor</span>
|
||||
<span class="sd"> :return: array-like of shape `(n_classes,)` with the smoothed distribution</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">n_classes</span> <span class="o">=</span> <span class="n">prevs</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">prevs</span> <span class="o">+</span> <span class="n">eps</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">eps</span> <span class="o">*</span> <span class="n">n_classes</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">__check_eps</span><span class="p">(</span><span class="n">eps</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">eps</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">sample_size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'eps was not defined, and qp.environ["SAMPLE_SIZE"] was not set'</span><span class="p">)</span>
|
||||
<span class="n">eps</span> <span class="o">=</span> <span class="mf">1.</span> <span class="o">/</span> <span class="p">(</span><span class="mf">2.</span> <span class="o">*</span> <span class="n">sample_size</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">eps</span>
|
||||
|
||||
|
||||
<span class="n">CLASSIFICATION_ERROR</span> <span class="o">=</span> <span class="p">{</span><span class="n">f1e</span><span class="p">,</span> <span class="n">acce</span><span class="p">}</span>
|
||||
<span class="n">QUANTIFICATION_ERROR</span> <span class="o">=</span> <span class="p">{</span><span class="n">mae</span><span class="p">,</span> <span class="n">mnae</span><span class="p">,</span> <span class="n">mrae</span><span class="p">,</span> <span class="n">mnrae</span><span class="p">,</span> <span class="n">mse</span><span class="p">,</span> <span class="n">mkld</span><span class="p">,</span> <span class="n">mnkld</span><span class="p">}</span>
|
||||
<span class="n">QUANTIFICATION_ERROR_SINGLE</span> <span class="o">=</span> <span class="p">{</span><span class="n">ae</span><span class="p">,</span> <span class="n">nae</span><span class="p">,</span> <span class="n">rae</span><span class="p">,</span> <span class="n">nrae</span><span class="p">,</span> <span class="n">se</span><span class="p">,</span> <span class="n">kld</span><span class="p">,</span> <span class="n">nkld</span><span class="p">}</span>
|
||||
<span class="n">QUANTIFICATION_ERROR_SMOOTH</span> <span class="o">=</span> <span class="p">{</span><span class="n">kld</span><span class="p">,</span> <span class="n">nkld</span><span class="p">,</span> <span class="n">rae</span><span class="p">,</span> <span class="n">nrae</span><span class="p">,</span> <span class="n">mkld</span><span class="p">,</span> <span class="n">mnkld</span><span class="p">,</span> <span class="n">mrae</span><span class="p">}</span>
|
||||
<span class="n">CLASSIFICATION_ERROR_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">CLASSIFICATION_ERROR</span><span class="p">}</span>
|
||||
<span class="n">QUANTIFICATION_ERROR_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR</span><span class="p">}</span>
|
||||
<span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR_SINGLE</span><span class="p">}</span>
|
||||
<span class="n">QUANTIFICATION_ERROR_SMOOTH_NAMES</span> <span class="o">=</span> <span class="p">{</span><span class="n">func</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">func</span> <span class="ow">in</span> <span class="n">QUANTIFICATION_ERROR_SMOOTH</span><span class="p">}</span>
|
||||
<span class="n">ERROR_NAMES</span> <span class="o">=</span> \
|
||||
<span class="n">CLASSIFICATION_ERROR_NAMES</span> <span class="o">|</span> <span class="n">QUANTIFICATION_ERROR_NAMES</span> <span class="o">|</span> <span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span>
|
||||
|
||||
<span class="n">f1_error</span> <span class="o">=</span> <span class="n">f1e</span>
|
||||
<span class="n">acc_error</span> <span class="o">=</span> <span class="n">acce</span>
|
||||
<span class="n">mean_absolute_error</span> <span class="o">=</span> <span class="n">mae</span>
|
||||
<span class="n">absolute_error</span> <span class="o">=</span> <span class="n">ae</span>
|
||||
<span class="n">mean_relative_absolute_error</span> <span class="o">=</span> <span class="n">mrae</span>
|
||||
<span class="n">relative_absolute_error</span> <span class="o">=</span> <span class="n">rae</span>
|
||||
<span class="n">normalized_absolute_error</span> <span class="o">=</span> <span class="n">nae</span>
|
||||
<span class="n">normalized_relative_absolute_error</span> <span class="o">=</span> <span class="n">nrae</span>
|
||||
<span class="n">mean_normalized_absolute_error</span> <span class="o">=</span> <span class="n">mnae</span>
|
||||
<span class="n">mean_normalized_relative_absolute_error</span> <span class="o">=</span> <span class="n">mnrae</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,291 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.evaluation — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||||
<script src="../../_static/jquery.js"></script>
|
||||
<script src="../../_static/underscore.js"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../_static/doctools.js"></script>
|
||||
<script src="../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.evaluation</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.evaluation</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Iterable</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">,</span> <span class="n">IterateProtocol</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span>
|
||||
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="prediction"><a class="viewcode-back" href="../../quapy.html#quapy.evaluation.prediction">[docs]</a><span class="k">def</span> <span class="nf">prediction</span><span class="p">(</span>
|
||||
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
||||
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
|
||||
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'auto'</span><span class="p">,</span>
|
||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Uses a quantification model to generate predictions for the samples generated via a specific protocol.</span>
|
||||
<span class="sd"> This function is central to all evaluation processes, and is endowed with an optimization to speed-up the</span>
|
||||
<span class="sd"> prediction of protocols that generate samples from a large collection. The optimization applies to aggregative</span>
|
||||
<span class="sd"> quantifiers only, and to OnLabelledCollectionProtocol protocols, and comes down to generating the classification</span>
|
||||
<span class="sd"> predictions once and for all, and then generating samples over the classification predictions (instead of over</span>
|
||||
<span class="sd"> the raw instances), so that the classifier prediction is never called again. This behaviour is obtained by</span>
|
||||
<span class="sd"> setting `aggr_speedup` to 'auto' or True, and is only carried out if the overall process is convenient in terms</span>
|
||||
<span class="sd"> of computations (e.g., if the number of classification predictions needed for the original collection exceed the</span>
|
||||
<span class="sd"> number of classification predictions needed for all samples, then the optimization is not undertaken).</span>
|
||||
|
||||
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
|
||||
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
|
||||
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol</span>
|
||||
<span class="sd"> in charge of generating the samples for which the model has to issue class prevalence predictions.</span>
|
||||
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of</span>
|
||||
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
|
||||
<span class="sd"> in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is</span>
|
||||
<span class="sd"> convenient or not. Set to False to deactivate.</span>
|
||||
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
|
||||
<span class="sd"> :return: a tuple `(true_prevs, estim_prevs)` in which each element in the tuple is an array of shape</span>
|
||||
<span class="sd"> `(n_samples, n_classes)` containing the true, or predicted, prevalence values for each sample</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">aggr_speedup</span> <span class="ow">in</span> <span class="p">[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'auto'</span><span class="p">,</span> <span class="s1">'force'</span><span class="p">],</span> <span class="s1">'invalid value for aggr_speedup'</span>
|
||||
|
||||
<span class="n">sout</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">print</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">if</span> <span class="n">verbose</span> <span class="k">else</span> <span class="kc">None</span>
|
||||
|
||||
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">aggr_speedup</span> <span class="ow">in</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="s1">'auto'</span><span class="p">,</span> <span class="s1">'force'</span><span class="p">]:</span>
|
||||
<span class="c1"># checks whether the prediction can be made more efficiently; this check consists in verifying if the model is</span>
|
||||
<span class="c1"># of type aggregative, if the protocol is based on LabelledCollection, and if the total number of documents to</span>
|
||||
<span class="c1"># classify using the protocol would exceed the number of test documents in the original collection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">AggregativeQuantifier</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">aggr_speedup</span> <span class="o">==</span> <span class="s1">'force'</span><span class="p">:</span>
|
||||
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'forcing aggregative speedup'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="s1">'sample_size'</span><span class="p">):</span>
|
||||
<span class="n">nD</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">())</span>
|
||||
<span class="n">samplesD</span> <span class="o">=</span> <span class="n">protocol</span><span class="o">.</span><span class="n">total</span><span class="p">()</span> <span class="o">*</span> <span class="n">protocol</span><span class="o">.</span><span class="n">sample_size</span>
|
||||
<span class="k">if</span> <span class="n">nD</span> <span class="o"><</span> <span class="n">samplesD</span><span class="p">:</span>
|
||||
<span class="n">apply_optimization</span> <span class="o">=</span> <span class="kc">True</span>
|
||||
<span class="n">sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'speeding up the prediction for the aggregative quantifier, '</span>
|
||||
<span class="sa">f</span><span class="s1">'total classifications </span><span class="si">{</span><span class="n">nD</span><span class="si">}</span><span class="s1"> instead of </span><span class="si">{</span><span class="n">samplesD</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">apply_optimization</span><span class="p">:</span>
|
||||
<span class="n">pre_classified</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classify</span><span class="p">(</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">()</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">protocol_with_predictions</span> <span class="o">=</span> <span class="n">protocol</span><span class="o">.</span><span class="n">on_preclassified_instances</span><span class="p">(</span><span class="n">pre_classified</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">__prediction_helper</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">aggregate</span><span class="p">,</span> <span class="n">protocol_with_predictions</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">__prediction_helper</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">verbose</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">__prediction_helper</span><span class="p">(</span><span class="n">quantification_fn</span><span class="p">,</span> <span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">sample_instances</span><span class="p">,</span> <span class="n">sample_prev</span> <span class="ow">in</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">protocol</span><span class="p">(),</span> <span class="n">total</span><span class="o">=</span><span class="n">protocol</span><span class="o">.</span><span class="n">total</span><span class="p">(),</span> <span class="n">desc</span><span class="o">=</span><span class="s1">'predicting'</span><span class="p">)</span> <span class="k">if</span> <span class="n">verbose</span> <span class="k">else</span> <span class="n">protocol</span><span class="p">():</span>
|
||||
<span class="n">estim_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">quantification_fn</span><span class="p">(</span><span class="n">sample_instances</span><span class="p">))</span>
|
||||
<span class="n">true_prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sample_prev</span><span class="p">)</span>
|
||||
|
||||
<span class="n">true_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">)</span>
|
||||
<span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">estim_prevs</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="evaluation_report"><a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluation_report">[docs]</a><span class="k">def</span> <span class="nf">evaluation_report</span><span class="p">(</span><span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
||||
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
|
||||
<span class="n">error_metrics</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span><span class="n">Callable</span><span class="p">]]</span> <span class="o">=</span> <span class="s1">'mae'</span><span class="p">,</span>
|
||||
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'auto'</span><span class="p">,</span>
|
||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generates a report (a pandas' DataFrame) containing information of the evaluation of the model as according</span>
|
||||
<span class="sd"> to a specific protocol and in terms of one or more evaluation metrics (errors).</span>
|
||||
|
||||
|
||||
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
|
||||
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
|
||||
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the protocol</span>
|
||||
<span class="sd"> in charge of generating the samples in which the model is evaluated.</span>
|
||||
<span class="sd"> :param error_metrics: a string, or list of strings, representing the name(s) of an error function in `qp.error`</span>
|
||||
<span class="sd"> (e.g., 'mae', the default value), or a callable function, or a list of callable functions, implementing</span>
|
||||
<span class="sd"> the error function itself.</span>
|
||||
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of</span>
|
||||
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
|
||||
<span class="sd"> in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is</span>
|
||||
<span class="sd"> convenient or not. Set to False to deactivate.</span>
|
||||
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
|
||||
<span class="sd"> :return: a pandas' DataFrame containing the columns 'true-prev' (the true prevalence of each sample),</span>
|
||||
<span class="sd"> 'estim-prev' (the prevalence estimated by the model for each sample), and as many columns as error metrics</span>
|
||||
<span class="sd"> have been indicated, each displaying the score in terms of that metric for every sample.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">prediction</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="n">aggr_speedup</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">_prevalence_report</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">error_metrics</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_prevalence_report</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">error_metrics</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]]</span> <span class="o">=</span> <span class="s1">'mae'</span><span class="p">):</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error_metrics</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="n">error_metrics</span> <span class="o">=</span> <span class="p">[</span><span class="n">error_metrics</span><span class="p">]</span>
|
||||
|
||||
<span class="n">error_funcs</span> <span class="o">=</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="k">else</span> <span class="n">e</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_metrics</span><span class="p">]</span>
|
||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="nb">hasattr</span><span class="p">(</span><span class="n">e</span><span class="p">,</span> <span class="s1">'__call__'</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_funcs</span><span class="p">),</span> <span class="s1">'invalid error functions'</span>
|
||||
<span class="n">error_names</span> <span class="o">=</span> <span class="p">[</span><span class="n">e</span><span class="o">.</span><span class="vm">__name__</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">error_funcs</span><span class="p">]</span>
|
||||
|
||||
<span class="n">row_entries</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
||||
<span class="n">series</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'true-prev'</span><span class="p">:</span> <span class="n">true_prev</span><span class="p">,</span> <span class="s1">'estim-prev'</span><span class="p">:</span> <span class="n">estim_prev</span><span class="p">}</span>
|
||||
<span class="k">for</span> <span class="n">error_name</span><span class="p">,</span> <span class="n">error_metric</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">error_names</span><span class="p">,</span> <span class="n">error_funcs</span><span class="p">):</span>
|
||||
<span class="n">score</span> <span class="o">=</span> <span class="n">error_metric</span><span class="p">(</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">)</span>
|
||||
<span class="n">series</span><span class="p">[</span><span class="n">error_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">score</span>
|
||||
<span class="n">row_entries</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">series</span><span class="p">)</span>
|
||||
|
||||
<span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_records</span><span class="p">(</span><span class="n">row_entries</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">df</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="evaluate"><a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluate">[docs]</a><span class="k">def</span> <span class="nf">evaluate</span><span class="p">(</span>
|
||||
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
||||
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
|
||||
<span class="n">error_metric</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">],</span>
|
||||
<span class="n">aggr_speedup</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'auto'</span><span class="p">,</span>
|
||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Evaluates a quantification model according to a specific sample generation protocol and in terms of one</span>
|
||||
<span class="sd"> evaluation metric (error).</span>
|
||||
|
||||
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
|
||||
<span class="sd"> :param protocol: :class:`quapy.protocol.AbstractProtocol`; if this object is also instance of</span>
|
||||
<span class="sd"> :class:`quapy.protocol.OnLabelledCollectionProtocol`, then the aggregation speed-up can be run. This is the</span>
|
||||
<span class="sd"> protocol in charge of generating the samples in which the model is evaluated.</span>
|
||||
<span class="sd"> :param error_metric: a string representing the name(s) of an error function in `qp.error`</span>
|
||||
<span class="sd"> (e.g., 'mae'), or a callable function implementing the error function itself.</span>
|
||||
<span class="sd"> :param aggr_speedup: whether or not to apply the speed-up. Set to "force" for applying it even if the number of</span>
|
||||
<span class="sd"> instances in the original collection on which the protocol acts is larger than the number of instances</span>
|
||||
<span class="sd"> in the samples to be generated. Set to True or "auto" (default) for letting QuaPy decide whether it is</span>
|
||||
<span class="sd"> convenient or not. Set to False to deactivate.</span>
|
||||
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
|
||||
<span class="sd"> :return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with</span>
|
||||
<span class="sd"> the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns</span>
|
||||
<span class="sd"> a single float</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error_metric</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="n">error_metric</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error_metric</span><span class="p">)</span>
|
||||
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">prediction</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="n">aggr_speedup</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">error_metric</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="evaluate_on_samples"><a class="viewcode-back" href="../../quapy.html#quapy.evaluation.evaluate_on_samples">[docs]</a><span class="k">def</span> <span class="nf">evaluate_on_samples</span><span class="p">(</span>
|
||||
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
||||
<span class="n">samples</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">],</span>
|
||||
<span class="n">error_metric</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">],</span>
|
||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Evaluates a quantification model on a given set of samples and in terms of one evaluation metric (error).</span>
|
||||
|
||||
<span class="sd"> :param model: a quantifier, instance of :class:`quapy.method.base.BaseQuantifier`</span>
|
||||
<span class="sd"> :param samples: a list of samples on which the quantifier is to be evaluated</span>
|
||||
<span class="sd"> :param error_metric: a string representing the name(s) of an error function in `qp.error`</span>
|
||||
<span class="sd"> (e.g., 'mae'), or a callable function implementing the error function itself.</span>
|
||||
<span class="sd"> :param verbose: boolean, show or not information in stdout</span>
|
||||
<span class="sd"> :return: if the error metric is not averaged (e.g., 'ae', 'rae'), returns an array of shape `(n_samples,)` with</span>
|
||||
<span class="sd"> the error scores for each sample; if the error metric is averaged (e.g., 'mae', 'mrae') then returns</span>
|
||||
<span class="sd"> a single float</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">IterateProtocol</span><span class="p">(</span><span class="n">samples</span><span class="p">),</span> <span class="n">error_metric</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="n">verbose</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,468 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.functional — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||||
<script src="../../_static/jquery.js"></script>
|
||||
<script src="../../_static/underscore.js"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../_static/doctools.js"></script>
|
||||
<script src="../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.functional</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.functional</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
|
||||
<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">scipy</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="prevalence_linspace"><a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_linspace">[docs]</a><span class="k">def</span> <span class="nf">prevalence_linspace</span><span class="p">(</span><span class="n">n_prevalences</span><span class="o">=</span><span class="mi">21</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mf">0.01</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Produces an array of uniformly separated values of prevalence.</span>
|
||||
<span class="sd"> By default, produces an array of 21 prevalence values, with</span>
|
||||
<span class="sd"> step 0.05 and with the limits smoothed, i.e.:</span>
|
||||
<span class="sd"> [0.01, 0.05, 0.10, 0.15, ..., 0.90, 0.95, 0.99]</span>
|
||||
|
||||
<span class="sd"> :param n_prevalences: the number of prevalence values to sample from the [0,1] interval (default 21)</span>
|
||||
<span class="sd"> :param repeats: number of times each prevalence is to be repeated (defaults to 1)</span>
|
||||
<span class="sd"> :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1</span>
|
||||
<span class="sd"> :return: an array of uniformly separated prevalence values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mf">0.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">,</span> <span class="n">num</span><span class="o">=</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">endpoint</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+=</span> <span class="n">smooth_limits_epsilon</span>
|
||||
<span class="n">p</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">-=</span> <span class="n">smooth_limits_epsilon</span>
|
||||
<span class="k">if</span> <span class="n">p</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">></span> <span class="n">p</span><span class="p">[</span><span class="mi">1</span><span class="p">]:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the smoothing in the limits is greater than the prevalence step'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">repeats</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">repeats</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">p</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="prevalence_from_labels"><a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_from_labels">[docs]</a><span class="k">def</span> <span class="nf">prevalence_from_labels</span><span class="p">(</span><span class="n">labels</span><span class="p">,</span> <span class="n">classes</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Computed the prevalence values from a vector of labels.</span>
|
||||
|
||||
<span class="sd"> :param labels: array-like of shape `(n_instances)` with the label for each instance</span>
|
||||
<span class="sd"> :param classes: the class labels. This is needed in order to correctly compute the prevalence vector even when</span>
|
||||
<span class="sd"> some classes have no examples.</span>
|
||||
<span class="sd"> :return: an ndarray of shape `(len(classes))` with the class prevalence values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">labels</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'param labels does not seem to be a ndarray of label predictions'</span><span class="p">)</span>
|
||||
<span class="n">unique</span><span class="p">,</span> <span class="n">counts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">labels</span><span class="p">,</span> <span class="n">return_counts</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">by_class</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span><span class="mi">0</span><span class="p">,</span> <span class="nb">dict</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">unique</span><span class="p">,</span> <span class="n">counts</span><span class="p">)))</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">by_class</span><span class="p">[</span><span class="n">class_</span><span class="p">]</span> <span class="k">for</span> <span class="n">class_</span> <span class="ow">in</span> <span class="n">classes</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
||||
<span class="n">prevalences</span> <span class="o">/=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">prevalences</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="prevalence_from_probabilities"><a class="viewcode-back" href="../../quapy.html#quapy.functional.prevalence_from_probabilities">[docs]</a><span class="k">def</span> <span class="nf">prevalence_from_probabilities</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">binarize</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns a vector of prevalence values from a matrix of posterior probabilities.</span>
|
||||
|
||||
<span class="sd"> :param posteriors: array-like of shape `(n_instances, n_classes,)` with posterior probabilities for each class</span>
|
||||
<span class="sd"> :param binarize: set to True (default is False) for computing the prevalence values on crisp decisions (i.e.,</span>
|
||||
<span class="sd"> converting the vectors of posterior probabilities into class indices, by taking the argmax).</span>
|
||||
<span class="sd"> :return: array of shape `(n_classes,)` containing the prevalence values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">posteriors</span><span class="o">.</span><span class="n">ndim</span> <span class="o">!=</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'param posteriors does not seem to be a ndarray of posteior probabilities'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">binarize</span><span class="p">:</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">prevalence_from_labels</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">posteriors</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">posteriors</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">prevalences</span> <span class="o">/=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">prevalences</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="as_binary_prevalence"><a class="viewcode-back" href="../../quapy.html#quapy.functional.as_binary_prevalence">[docs]</a><span class="k">def</span> <span class="nf">as_binary_prevalence</span><span class="p">(</span><span class="n">positive_prevalence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">],</span> <span class="n">clip_if_necessary</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two</span>
|
||||
<span class="sd"> values representing a binary distribution.</span>
|
||||
|
||||
<span class="sd"> :param positive_prevalence: prevalence for the positive class</span>
|
||||
<span class="sd"> :param clip_if_necessary: if True, clips the value in [0,1] in order to guarantee the resulting distribution</span>
|
||||
<span class="sd"> is valid. If False, it then checks that the value is in the valid range, and raises an error if not.</span>
|
||||
<span class="sd"> :return: np.ndarray of shape `(2,)`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">clip_if_necessary</span><span class="p">:</span>
|
||||
<span class="n">positive_prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">positive_prevalence</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">assert</span> <span class="mi">0</span> <span class="o"><=</span> <span class="n">positive_prevalence</span> <span class="o"><=</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'the value provided is not a valid prevalence for the positive class'</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span><span class="o">-</span><span class="n">positive_prevalence</span><span class="p">,</span> <span class="n">positive_prevalence</span><span class="p">])</span><span class="o">.</span><span class="n">T</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="HellingerDistance"><a class="viewcode-back" href="../../quapy.html#quapy.functional.HellingerDistance">[docs]</a><span class="k">def</span> <span class="nf">HellingerDistance</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">Q</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Computes the Hellingher Distance (HD) between (discretized) distributions `P` and `Q`.</span>
|
||||
<span class="sd"> The HD for two discrete distributions of `k` bins is defined as:</span>
|
||||
|
||||
<span class="sd"> .. math::</span>
|
||||
<span class="sd"> HD(P,Q) = \\frac{ 1 }{ \\sqrt{ 2 } } \\sqrt{ \\sum_{i=1}^k ( \\sqrt{p_i} - \\sqrt{q_i} )^2 }</span>
|
||||
|
||||
<span class="sd"> :param P: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
|
||||
<span class="sd"> :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
|
||||
<span class="sd"> :return: float</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">((</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">P</span><span class="p">)</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">Q</span><span class="p">))</span><span class="o">**</span><span class="mi">2</span><span class="p">))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TopsoeDistance"><a class="viewcode-back" href="../../quapy.html#quapy.functional.TopsoeDistance">[docs]</a><span class="k">def</span> <span class="nf">TopsoeDistance</span><span class="p">(</span><span class="n">P</span><span class="p">,</span> <span class="n">Q</span><span class="p">,</span> <span class="n">epsilon</span><span class="o">=</span><span class="mf">1e-20</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Topsoe distance between two (discretized) distributions `P` and `Q`.</span>
|
||||
<span class="sd"> The Topsoe distance for two discrete distributions of `k` bins is defined as:</span>
|
||||
|
||||
<span class="sd"> .. math::</span>
|
||||
<span class="sd"> Topsoe(P,Q) = \\sum_{i=1}^k \\left( p_i \\log\\left(\\frac{ 2 p_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) +</span>
|
||||
<span class="sd"> q_i \\log\\left(\\frac{ 2 q_i + \\epsilon }{ p_i+q_i+\\epsilon }\\right) \\right)</span>
|
||||
|
||||
<span class="sd"> :param P: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
|
||||
<span class="sd"> :param Q: real-valued array-like of shape `(k,)` representing a discrete distribution</span>
|
||||
<span class="sd"> :return: float</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">P</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="mi">2</span><span class="o">*</span><span class="n">P</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">P</span><span class="o">+</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">))</span> <span class="o">+</span> <span class="n">Q</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="mi">2</span><span class="o">*</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)</span><span class="o">/</span><span class="p">(</span><span class="n">P</span><span class="o">+</span><span class="n">Q</span><span class="o">+</span><span class="n">epsilon</span><span class="p">)))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="uniform_prevalence_sampling"><a class="viewcode-back" href="../../quapy.html#quapy.functional.uniform_prevalence_sampling">[docs]</a><span class="k">def</span> <span class="nf">uniform_prevalence_sampling</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements the `Kraemer algorithm <http://www.cs.cmu.edu/~nasmith/papers/smith+tromble.tr04.pdf>`_</span>
|
||||
<span class="sd"> for sampling uniformly at random from the unit simplex. This implementation is adapted from this</span>
|
||||
<span class="sd"> `post <https://cs.stackexchange.com/questions/3227/uniform-sampling-from-a-simplex>_`.</span>
|
||||
|
||||
<span class="sd"> :param n_classes: integer, number of classes (dimensionality of the simplex)</span>
|
||||
<span class="sd"> :param size: number of samples to return</span>
|
||||
<span class="sd"> :return: `np.ndarray` of shape `(size, n_classes,)` if `size>1`, or of shape `(n_classes,)` otherwise</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">n_classes</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
|
||||
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="n">size</span><span class="p">)</span>
|
||||
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">([</span><span class="mi">1</span><span class="o">-</span><span class="n">u</span><span class="p">,</span> <span class="n">u</span><span class="p">])</span><span class="o">.</span><span class="n">T</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">u</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">u</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">_0s</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
|
||||
<span class="n">_1s</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">size</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span>
|
||||
<span class="n">a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">([</span><span class="n">_0s</span><span class="p">,</span> <span class="n">u</span><span class="p">])</span>
|
||||
<span class="n">b</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">hstack</span><span class="p">([</span><span class="n">u</span><span class="p">,</span> <span class="n">_1s</span><span class="p">])</span>
|
||||
<span class="n">u</span> <span class="o">=</span> <span class="n">b</span><span class="o">-</span><span class="n">a</span>
|
||||
<span class="k">if</span> <span class="n">size</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">u</span> <span class="o">=</span> <span class="n">u</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">u</span></div>
|
||||
|
||||
|
||||
<span class="n">uniform_simplex_sampling</span> <span class="o">=</span> <span class="n">uniform_prevalence_sampling</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="strprev"><a class="viewcode-back" href="../../quapy.html#quapy.functional.strprev">[docs]</a><span class="k">def</span> <span class="nf">strprev</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">3</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns a string representation for a prevalence vector. E.g.,</span>
|
||||
|
||||
<span class="sd"> >>> strprev([1/3, 2/3], prec=2)</span>
|
||||
<span class="sd"> >>> '[0.33, 0.67]'</span>
|
||||
|
||||
<span class="sd"> :param prevalences: a vector of prevalence values</span>
|
||||
<span class="sd"> :param prec: float precision</span>
|
||||
<span class="sd"> :return: string</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="s1">'['</span><span class="o">+</span> <span class="s1">', '</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">p</span><span class="si">:</span><span class="s1">.</span><span class="si">{</span><span class="n">prec</span><span class="si">}</span><span class="s1">f</span><span class="si">}</span><span class="s1">'</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">prevalences</span><span class="p">])</span> <span class="o">+</span> <span class="s1">']'</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="adjusted_quantification"><a class="viewcode-back" href="../../quapy.html#quapy.functional.adjusted_quantification">[docs]</a><span class="k">def</span> <span class="nf">adjusted_quantification</span><span class="p">(</span><span class="n">prevalence_estim</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">clip</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements the adjustment of ACC and PACC for the binary case. The adjustment for a prevalence estimate of the</span>
|
||||
<span class="sd"> positive class `p` comes down to computing:</span>
|
||||
|
||||
<span class="sd"> .. math::</span>
|
||||
<span class="sd"> ACC(p) = \\frac{ p - fpr }{ tpr - fpr }</span>
|
||||
|
||||
<span class="sd"> :param prevalence_estim: float, the estimated value for the positive class</span>
|
||||
<span class="sd"> :param tpr: float, the true positive rate of the classifier</span>
|
||||
<span class="sd"> :param fpr: float, the false positive rate of the classifier</span>
|
||||
<span class="sd"> :param clip: set to True (default) to clip values that might exceed the range [0,1]</span>
|
||||
<span class="sd"> :return: float, the adjusted count</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">den</span> <span class="o">=</span> <span class="n">tpr</span> <span class="o">-</span> <span class="n">fpr</span>
|
||||
<span class="k">if</span> <span class="n">den</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">den</span> <span class="o">+=</span> <span class="mf">1e-8</span>
|
||||
<span class="n">adjusted</span> <span class="o">=</span> <span class="p">(</span><span class="n">prevalence_estim</span> <span class="o">-</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">/</span> <span class="n">den</span>
|
||||
<span class="k">if</span> <span class="n">clip</span><span class="p">:</span>
|
||||
<span class="n">adjusted</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">clip</span><span class="p">(</span><span class="n">adjusted</span><span class="p">,</span> <span class="mf">0.</span><span class="p">,</span> <span class="mf">1.</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">adjusted</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="normalize_prevalence"><a class="viewcode-back" href="../../quapy.html#quapy.functional.normalize_prevalence">[docs]</a><span class="k">def</span> <span class="nf">normalize_prevalence</span><span class="p">(</span><span class="n">prevalences</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Normalize a vector or matrix of prevalence values. The normalization consists of applying a L1 normalization in</span>
|
||||
<span class="sd"> cases in which the prevalence values are not all-zeros, and to convert the prevalence values into `1/n_classes` in</span>
|
||||
<span class="sd"> cases in which all values are zero.</span>
|
||||
|
||||
<span class="sd"> :param prevalences: array-like of shape `(n_classes,)` or of shape `(n_samples, n_classes,)` with prevalence values</span>
|
||||
<span class="sd"> :return: a normalized vector or matrix of prevalence values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevalences</span><span class="p">)</span>
|
||||
<span class="n">n_classes</span> <span class="o">=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">accum</span> <span class="o">=</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">keepdims</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">true_divide</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">accum</span><span class="p">,</span> <span class="n">where</span><span class="o">=</span><span class="n">accum</span><span class="o">></span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">allzeros</span> <span class="o">=</span> <span class="n">accum</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span><span class="o">==</span><span class="mi">0</span>
|
||||
<span class="k">if</span> <span class="nb">any</span><span class="p">(</span><span class="n">allzeros</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">ndim</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mf">1.</span><span class="o">/</span><span class="n">n_classes</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">prevalences</span><span class="p">[</span><span class="n">accum</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span><span class="o">==</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mf">1.</span><span class="o">/</span><span class="n">n_classes</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">prevalences</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">__num_prevalence_combinations_depr</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Computes the number of prevalence combinations in the n_classes-dimensional simplex if `nprevpoints` equally distant</span>
|
||||
<span class="sd"> prevalence values are generated and `n_repeats` repetitions are requested.</span>
|
||||
|
||||
<span class="sd"> :param n_classes: integer, number of classes</span>
|
||||
<span class="sd"> :param n_prevpoints: integer, number of prevalence points.</span>
|
||||
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
|
||||
<span class="sd"> :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the</span>
|
||||
<span class="sd"> number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">__cache</span><span class="o">=</span><span class="p">{}</span>
|
||||
<span class="k">def</span> <span class="nf">__f</span><span class="p">(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)</span> <span class="ow">in</span> <span class="n">__cache</span><span class="p">:</span> <span class="c1"># cached result</span>
|
||||
<span class="k">return</span> <span class="n">__cache</span><span class="p">[(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)]</span>
|
||||
<span class="k">if</span> <span class="n">nc</span><span class="o">==</span><span class="mi">1</span><span class="p">:</span> <span class="c1"># stop condition</span>
|
||||
<span class="k">return</span> <span class="mi">1</span>
|
||||
<span class="k">else</span><span class="p">:</span> <span class="c1"># recursive call</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">([</span><span class="n">__f</span><span class="p">(</span><span class="n">nc</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">-</span><span class="n">i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">np</span><span class="p">)])</span>
|
||||
<span class="n">__cache</span><span class="p">[(</span><span class="n">nc</span><span class="p">,</span><span class="n">np</span><span class="p">)]</span> <span class="o">=</span> <span class="n">x</span>
|
||||
<span class="k">return</span> <span class="n">x</span>
|
||||
<span class="k">return</span> <span class="n">__f</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_prevpoints</span><span class="p">)</span> <span class="o">*</span> <span class="n">n_repeats</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="num_prevalence_combinations"><a class="viewcode-back" href="../../quapy.html#quapy.functional.num_prevalence_combinations">[docs]</a><span class="k">def</span> <span class="nf">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Computes the number of valid prevalence combinations in the n_classes-dimensional simplex if `n_prevpoints` equally</span>
|
||||
<span class="sd"> distant prevalence values are generated and `n_repeats` repetitions are requested.</span>
|
||||
<span class="sd"> The computation comes down to calculating:</span>
|
||||
|
||||
<span class="sd"> .. math::</span>
|
||||
<span class="sd"> \\binom{N+C-1}{C-1} \\times r</span>
|
||||
|
||||
<span class="sd"> where `N` is `n_prevpoints-1`, i.e., the number of probability mass blocks to allocate, `C` is the number of</span>
|
||||
<span class="sd"> classes, and `r` is `n_repeats`. This solution comes from the</span>
|
||||
<span class="sd"> `Stars and Bars <https://brilliant.org/wiki/integer-equations-star-and-bars/>`_ problem.</span>
|
||||
|
||||
<span class="sd"> :param n_classes: integer, number of classes</span>
|
||||
<span class="sd"> :param n_prevpoints: integer, number of prevalence points.</span>
|
||||
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
|
||||
<span class="sd"> :return: The number of possible combinations. For example, if n_classes=2, n_prevpoints=5, n_repeats=1, then the</span>
|
||||
<span class="sd"> number of possible combinations are 5, i.e.: [0,1], [0.25,0.75], [0.50,0.50], [0.75,0.25], and [1.0,0.0]</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">N</span> <span class="o">=</span> <span class="n">n_prevpoints</span><span class="o">-</span><span class="mi">1</span>
|
||||
<span class="n">C</span> <span class="o">=</span> <span class="n">n_classes</span>
|
||||
<span class="n">r</span> <span class="o">=</span> <span class="n">n_repeats</span>
|
||||
<span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">scipy</span><span class="o">.</span><span class="n">special</span><span class="o">.</span><span class="n">binom</span><span class="p">(</span><span class="n">N</span> <span class="o">+</span> <span class="n">C</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">C</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">r</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_nprevpoints_approximation"><a class="viewcode-back" href="../../quapy.html#quapy.functional.get_nprevpoints_approximation">[docs]</a><span class="k">def</span> <span class="nf">get_nprevpoints_approximation</span><span class="p">(</span><span class="n">combinations_budget</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">:</span><span class="nb">int</span><span class="o">=</span><span class="mi">1</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Searches for the largest number of (equidistant) prevalence points to define for each of the `n_classes` classes so</span>
|
||||
<span class="sd"> that the number of valid prevalence values generated as combinations of prevalence points (points in a</span>
|
||||
<span class="sd"> `n_classes`-dimensional simplex) do not exceed combinations_budget.</span>
|
||||
|
||||
<span class="sd"> :param combinations_budget: integer, maximum number of combinations allowed</span>
|
||||
<span class="sd"> :param n_classes: integer, number of classes</span>
|
||||
<span class="sd"> :param n_repeats: integer, number of repetitions for each prevalence combination</span>
|
||||
<span class="sd"> :return: the largest number of prevalence points that generate less than combinations_budget valid prevalences</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">n_classes</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">n_repeats</span> <span class="o">></span> <span class="mi">0</span> <span class="ow">and</span> <span class="n">combinations_budget</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'parameters must be positive integers'</span>
|
||||
<span class="n">n_prevpoints</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
||||
<span class="n">combinations</span> <span class="o">=</span> <span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">combinations</span> <span class="o">></span> <span class="n">combinations_budget</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">n_prevpoints</span><span class="o">-</span><span class="mi">1</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">n_prevpoints</span> <span class="o">+=</span> <span class="mi">1</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="check_prevalence_vector"><a class="viewcode-back" href="../../quapy.html#quapy.functional.check_prevalence_vector">[docs]</a><span class="k">def</span> <span class="nf">check_prevalence_vector</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">raise_exception</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">toleranze</span><span class="o">=</span><span class="mf">1e-08</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Checks that p is a valid prevalence vector, i.e., that it contains values in [0,1] and that the values sum up to 1.</span>
|
||||
|
||||
<span class="sd"> :param p: the prevalence vector to check</span>
|
||||
<span class="sd"> :return: True if `p` is valid, False otherwise</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">p</span><span class="o">>=</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'the prevalence vector contains negative numbers'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">p</span><span class="o"><=</span><span class="mi">1</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'the prevalence vector contains values >1'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">np</span><span class="o">.</span><span class="n">isclose</span><span class="p">(</span><span class="n">p</span><span class="o">.</span><span class="n">sum</span><span class="p">(),</span> <span class="mi">1</span><span class="p">,</span> <span class="n">atol</span><span class="o">=</span><span class="n">toleranze</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">raise_exception</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'the prevalence vector does not sum up to 1'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
<span class="k">return</span> <span class="kc">True</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_divergence"><a class="viewcode-back" href="../../quapy.html#quapy.functional.get_divergence">[docs]</a><span class="k">def</span> <span class="nf">get_divergence</span><span class="p">(</span><span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]):</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">divergence</span><span class="o">==</span><span class="s1">'HD'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">HellingerDistance</span>
|
||||
<span class="k">elif</span> <span class="n">divergence</span><span class="o">==</span><span class="s1">'topsoe'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">TopsoeDistance</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'unknown divergence </span><span class="si">{</span><span class="n">divergence</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">callable</span><span class="p">(</span><span class="n">divergence</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">divergence</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'argument "divergence" not understood; use a str or a callable function'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="argmin_prevalence"><a class="viewcode-back" href="../../quapy.html#quapy.functional.argmin_prevalence">[docs]</a><span class="k">def</span> <span class="nf">argmin_prevalence</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'optim_minimize'</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'optim_minimize'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">optim_minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'linear_search'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">linear_search</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'ternary_search'</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="optim_minimize"><a class="viewcode-back" href="../../quapy.html#quapy.functional.optim_minimize">[docs]</a><span class="k">def</span> <span class="nf">optim_minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Searches for the optimal prevalence values, i.e., an `n_classes`-dimensional vector of the (`n_classes`-1)-simplex</span>
|
||||
<span class="sd"> that yields the smallest lost. This optimization is carried out by means of a constrained search using scipy's</span>
|
||||
<span class="sd"> SLSQP routine.</span>
|
||||
|
||||
<span class="sd"> :param loss: (callable) the function to minimize</span>
|
||||
<span class="sd"> :param n_classes: (int) the number of classes, i.e., the dimensionality of the prevalence vector</span>
|
||||
<span class="sd"> :return: (ndarray) the best prevalence vector found</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="kn">from</span> <span class="nn">scipy</span> <span class="kn">import</span> <span class="n">optimize</span>
|
||||
|
||||
<span class="c1"># the initial point is set as the uniform distribution</span>
|
||||
<span class="n">uniform_distribution</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">full</span><span class="p">(</span><span class="n">fill_value</span><span class="o">=</span><span class="mi">1</span> <span class="o">/</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,))</span>
|
||||
|
||||
<span class="c1"># solutions are bounded to those contained in the unit-simplex</span>
|
||||
<span class="n">bounds</span> <span class="o">=</span> <span class="nb">tuple</span><span class="p">((</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_classes</span><span class="p">))</span> <span class="c1"># values in [0,1]</span>
|
||||
<span class="n">constraints</span> <span class="o">=</span> <span class="p">({</span><span class="s1">'type'</span><span class="p">:</span> <span class="s1">'eq'</span><span class="p">,</span> <span class="s1">'fun'</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="mi">1</span> <span class="o">-</span> <span class="nb">sum</span><span class="p">(</span><span class="n">x</span><span class="p">)})</span> <span class="c1"># values summing up to 1</span>
|
||||
<span class="n">r</span> <span class="o">=</span> <span class="n">optimize</span><span class="o">.</span><span class="n">minimize</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">x0</span><span class="o">=</span><span class="n">uniform_distribution</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'SLSQP'</span><span class="p">,</span> <span class="n">bounds</span><span class="o">=</span><span class="n">bounds</span><span class="p">,</span> <span class="n">constraints</span><span class="o">=</span><span class="n">constraints</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">r</span><span class="o">.</span><span class="n">x</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="linear_search"><a class="viewcode-back" href="../../quapy.html#quapy.functional.linear_search">[docs]</a><span class="k">def</span> <span class="nf">linear_search</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Performs a linear search for the best prevalence value in binary problems. The search is carried out by exploring</span>
|
||||
<span class="sd"> the range [0,1] stepping by 0.01. This search is inefficient, and is added only for completeness (some of the</span>
|
||||
<span class="sd"> early methods in quantification literature used it, e.g., HDy). A most powerful alternative is `optim_minimize`.</span>
|
||||
|
||||
<span class="sd"> :param loss: (callable) the function to minimize</span>
|
||||
<span class="sd"> :param n_classes: (int) the number of classes, i.e., the dimensionality of the prevalence vector</span>
|
||||
<span class="sd"> :return: (ndarray) the best prevalence vector found</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">n_classes</span><span class="o">==</span><span class="mi">2</span><span class="p">,</span> <span class="s1">'linear search is only available for binary problems'</span>
|
||||
|
||||
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_score</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
||||
<span class="k">for</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">prevalence_linspace</span><span class="p">(</span><span class="n">n_prevalences</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mf">0.0</span><span class="p">):</span>
|
||||
<span class="n">score</span> <span class="o">=</span> <span class="n">loss</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev</span><span class="p">,</span> <span class="n">prev</span><span class="p">]))</span>
|
||||
<span class="k">if</span> <span class="n">min_score</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">score</span> <span class="o"><</span> <span class="n">min_score</span><span class="p">:</span>
|
||||
<span class="n">prev_selected</span><span class="p">,</span> <span class="n">min_score</span> <span class="o">=</span> <span class="n">prev</span><span class="p">,</span> <span class="n">score</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="mi">1</span> <span class="o">-</span> <span class="n">prev_selected</span><span class="p">,</span> <span class="n">prev_selected</span><span class="p">])</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,462 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.method._kdey — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||||
<script src="../../../_static/jquery.js"></script>
|
||||
<script src="../../../_static/underscore.js"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../../_static/doctools.js"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.method._kdey</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.method._kdey</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.neighbors</span> <span class="kn">import</span> <span class="n">KernelDensity</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">AggregativeSoftQuantifier</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">sklearn.metrics.pairwise</span> <span class="kn">import</span> <span class="n">rbf_kernel</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="KDEBase"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase">[docs]</a><span class="k">class</span> <span class="nc">KDEBase</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Common ancestor for KDE-based methods. Implements some common routines.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">BANDWIDTH_METHOD</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'scott'</span><span class="p">,</span> <span class="s1">'silverman'</span><span class="p">]</span>
|
||||
|
||||
<span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">_check_bandwidth</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Checks that the bandwidth parameter is correct</span>
|
||||
|
||||
<span class="sd"> :param bandwidth: either a string (see BANDWIDTH_METHOD) or a float</span>
|
||||
<span class="sd"> :return: nothing, but raises an exception for invalid values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">bandwidth</span> <span class="ow">in</span> <span class="n">KDEBase</span><span class="o">.</span><span class="n">BANDWIDTH_METHOD</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">,</span> <span class="nb">float</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'invalid bandwidth, valid ones are </span><span class="si">{</span><span class="n">KDEBase</span><span class="o">.</span><span class="n">BANDWIDTH_METHOD</span><span class="si">}</span><span class="s1"> or float values'</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="mi">0</span> <span class="o"><</span> <span class="n">bandwidth</span> <span class="o"><</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">"the bandwith for KDEy should be in (0,1), since this method models the unit simplex"</span>
|
||||
|
||||
<div class="viewcode-block" id="KDEBase.get_kde_function"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.get_kde_function">[docs]</a> <span class="k">def</span> <span class="nf">get_kde_function</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Wraps the KDE function from scikit-learn.</span>
|
||||
|
||||
<span class="sd"> :param X: data for which the density function is to be estimated</span>
|
||||
<span class="sd"> :param bandwidth: the bandwidth of the kernel</span>
|
||||
<span class="sd"> :return: a scikit-learn's KernelDensity object</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">KernelDensity</span><span class="p">(</span><span class="n">bandwidth</span><span class="o">=</span><span class="n">bandwidth</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="KDEBase.pdf"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.pdf">[docs]</a> <span class="k">def</span> <span class="nf">pdf</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">kde</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Wraps the density evalution of scikit-learn's KDE. Scikit-learn returns log-scores (s), so this</span>
|
||||
<span class="sd"> function returns :math:`e^{s}`</span>
|
||||
|
||||
<span class="sd"> :param kde: a previously fit KDE function</span>
|
||||
<span class="sd"> :param X: the data for which the density is to be estimated</span>
|
||||
<span class="sd"> :return: np.ndarray with the densities</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="n">kde</span><span class="o">.</span><span class="n">score_samples</span><span class="p">(</span><span class="n">X</span><span class="p">))</span></div>
|
||||
|
||||
<div class="viewcode-block" id="KDEBase.get_mixture_components"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEBase.get_mixture_components">[docs]</a> <span class="k">def</span> <span class="nf">get_mixture_components</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">bandwidth</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns an array containing the mixture components, i.e., the KDE functions for each class.</span>
|
||||
|
||||
<span class="sd"> :param X: the data containing the covariates</span>
|
||||
<span class="sd"> :param y: the class labels</span>
|
||||
<span class="sd"> :param n_classes: integer, the number of classes</span>
|
||||
<span class="sd"> :param bandwidth: float, the bandwidth of the kernel</span>
|
||||
<span class="sd"> :return: a list of KernelDensity objects, each fitted with the corresponding class-specific covariates</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">get_kde_function</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">cat</span><span class="p">],</span> <span class="n">bandwidth</span><span class="p">)</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_classes</span><span class="p">)]</span></div></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="KDEyML"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML">[docs]</a><span class="k">class</span> <span class="nc">KDEyML</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">KDEBase</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the Kullback-Leibler divergence (KLD) as</span>
|
||||
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
|
||||
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which</span>
|
||||
<span class="sd"> the authors show that minimizing the distribution mathing criterion for KLD is akin to performing</span>
|
||||
<span class="sd"> maximum likelihood (ML).</span>
|
||||
|
||||
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
|
||||
|
||||
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
|
||||
|
||||
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
|
||||
<span class="sd"> :math:`\\alpha` defined by</span>
|
||||
|
||||
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
|
||||
|
||||
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
|
||||
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
|
||||
|
||||
<span class="sd"> In KDEy-ML, the divergence is taken to be the Kullback-Leibler Divergence. This is equivalent to solving:</span>
|
||||
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} -</span>
|
||||
<span class="sd"> \\mathbb{E}_{q_{\\widetilde{U}}} \\left[ \\log \\boldsymbol{p}_{\\alpha}(\\widetilde{x}) \\right]`</span>
|
||||
|
||||
<span class="sd"> which corresponds to the maximum likelihood estimate.</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier.</span>
|
||||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
||||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
||||
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
|
||||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
||||
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
|
||||
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
||||
<span class="sd"> on which the predictions are to be generated.</span>
|
||||
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
|
||||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
||||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
|
||||
|
||||
<div class="viewcode-block" id="KDEyML.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_mixture_components</span><span class="p">(</span><span class="o">*</span><span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="KDEyML.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyML.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Searches for the mixture model parameter (the sought prevalence values) that maximizes the likelihood</span>
|
||||
<span class="sd"> of the data (i.e., that minimizes the negative log-likelihood)</span>
|
||||
|
||||
<span class="sd"> :param posteriors: instances in the sample converted into posterior probabilities</span>
|
||||
<span class="sd"> :return: a vector of class prevalence estimates</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">RandomState</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="n">epsilon</span> <span class="o">=</span> <span class="mf">1e-10</span>
|
||||
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">)</span>
|
||||
<span class="n">test_densities</span> <span class="o">=</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">kde_i</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_i</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">]</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">neg_loglikelihood</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
||||
<span class="n">test_mixture_likelihood</span> <span class="o">=</span> <span class="nb">sum</span><span class="p">(</span><span class="n">prev_i</span> <span class="o">*</span> <span class="n">dens_i</span> <span class="k">for</span> <span class="n">prev_i</span><span class="p">,</span> <span class="n">dens_i</span> <span class="ow">in</span> <span class="nb">zip</span> <span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">test_densities</span><span class="p">))</span>
|
||||
<span class="n">test_loglikelihood</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">test_mixture_likelihood</span> <span class="o">+</span> <span class="n">epsilon</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">test_loglikelihood</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">neg_loglikelihood</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="KDEyHD"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD">[docs]</a><span class="k">class</span> <span class="nc">KDEyHD</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">,</span> <span class="n">KDEBase</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the squared Hellinger Disntace (HD) as</span>
|
||||
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
|
||||
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which</span>
|
||||
<span class="sd"> the authors proposed a Monte Carlo approach for minimizing the divergence.</span>
|
||||
|
||||
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
|
||||
|
||||
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
|
||||
|
||||
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
|
||||
<span class="sd"> :math:`\\alpha` defined by</span>
|
||||
|
||||
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
|
||||
|
||||
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
|
||||
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
|
||||
|
||||
<span class="sd"> In KDEy-HD, the divergence is taken to be the squared Hellinger Distance, an f-divergence with corresponding</span>
|
||||
<span class="sd"> f-generator function given by:</span>
|
||||
|
||||
<span class="sd"> :math:`f(u)=(\\sqrt{u}-1)^2`</span>
|
||||
|
||||
<span class="sd"> The authors proposed a Monte Carlo solution that relies on importance sampling:</span>
|
||||
|
||||
<span class="sd"> :math:`\\hat{D}_f(p||q)= \\frac{1}{t} \\sum_{i=1}^t f\\left(\\frac{p(x_i)}{q(x_i)}\\right) \\frac{q(x_i)}{r(x_i)}`</span>
|
||||
|
||||
<span class="sd"> where the datapoints (trials) :math:`x_1,\\ldots,x_t\\sim_{\\mathrm{iid}} r` with :math:`r` the</span>
|
||||
<span class="sd"> uniform distribution.</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier.</span>
|
||||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
||||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
||||
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
|
||||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
||||
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
|
||||
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
||||
<span class="sd"> on which the predictions are to be generated.</span>
|
||||
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
|
||||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
||||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
||||
<span class="sd"> :param montecarlo_trials: number of Monte Carlo trials (default 10000)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="nb">str</span><span class="o">=</span><span class="s1">'HD'</span><span class="p">,</span>
|
||||
<span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">montecarlo_trials</span><span class="o">=</span><span class="mi">10000</span><span class="p">):</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">montecarlo_trials</span> <span class="o">=</span> <span class="n">montecarlo_trials</span>
|
||||
|
||||
<div class="viewcode-block" id="KDEyHD.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_mixture_components</span><span class="p">(</span><span class="o">*</span><span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
|
||||
|
||||
<span class="n">N</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">montecarlo_trials</span>
|
||||
<span class="n">rs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span>
|
||||
<span class="n">n</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">([</span><span class="n">kde_i</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">N</span><span class="o">//</span><span class="n">n</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">rs</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_i</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">])</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">kde_j</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span><span class="p">)</span> <span class="k">for</span> <span class="n">kde_j</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">])</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">reference_density</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span> <span class="c1"># equiv. to (uniform @ self.reference_classwise_densities)</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="KDEyHD.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyHD.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||||
<span class="c1"># we retain all n*N examples (sampled from a mixture with uniform parameter), and then</span>
|
||||
<span class="c1"># apply importance sampling (IS). In this version we compute D(p_alpha||q) with IS</span>
|
||||
<span class="n">n_classes</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mix_densities</span><span class="p">)</span>
|
||||
|
||||
<span class="n">test_kde</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">get_kde_function</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span><span class="p">)</span>
|
||||
<span class="n">test_densities</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">pdf</span><span class="p">(</span><span class="n">test_kde</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_samples</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">f_squared_hellinger</span><span class="p">(</span><span class="n">u</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">u</span><span class="p">)</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">**</span><span class="mi">2</span>
|
||||
|
||||
<span class="c1"># todo: this will fail when self.divergence is a callable, and is not the right place to do it anyway</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="o">==</span> <span class="s1">'hd'</span><span class="p">:</span>
|
||||
<span class="n">f</span> <span class="o">=</span> <span class="n">f_squared_hellinger</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'only squared HD is currently implemented'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">epsilon</span> <span class="o">=</span> <span class="mf">1e-10</span>
|
||||
<span class="n">qs</span> <span class="o">=</span> <span class="n">test_densities</span> <span class="o">+</span> <span class="n">epsilon</span>
|
||||
<span class="n">rs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_density</span> <span class="o">+</span> <span class="n">epsilon</span>
|
||||
<span class="n">iw</span> <span class="o">=</span> <span class="n">qs</span><span class="o">/</span><span class="n">rs</span> <span class="c1">#importance weights</span>
|
||||
<span class="n">p_class</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">reference_classwise_densities</span> <span class="o">+</span> <span class="n">epsilon</span>
|
||||
<span class="n">fracs</span> <span class="o">=</span> <span class="n">p_class</span><span class="o">/</span><span class="n">qs</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">divergence</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
||||
<span class="c1"># ps / qs = (prev @ p_class) / qs = prev @ (p_class / qs) = prev @ fracs</span>
|
||||
<span class="n">ps_div_qs</span> <span class="o">=</span> <span class="n">prev</span> <span class="o">@</span> <span class="n">fracs</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span> <span class="n">f</span><span class="p">(</span><span class="n">ps_div_qs</span><span class="p">)</span> <span class="o">*</span> <span class="n">iw</span> <span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="KDEyCS"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS">[docs]</a><span class="k">class</span> <span class="nc">KDEyCS</span><span class="p">(</span><span class="n">AggregativeSoftQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Kernel Density Estimation model for quantification (KDEy) relying on the Cauchy-Schwarz divergence (CS) as</span>
|
||||
<span class="sd"> the divergence measure to be minimized. This method was first proposed in the paper</span>
|
||||
<span class="sd"> `Kernel Density Estimation for Multiclass Quantification <https://arxiv.org/abs/2401.00490>`_, in which</span>
|
||||
<span class="sd"> the authors proposed a Monte Carlo approach for minimizing the divergence.</span>
|
||||
|
||||
<span class="sd"> The distribution matching optimization problem comes down to solving:</span>
|
||||
|
||||
<span class="sd"> :math:`\\hat{\\alpha} = \\arg\\min_{\\alpha\\in\\Delta^{n-1}} \\mathcal{D}(\\boldsymbol{p}_{\\alpha}||q_{\\widetilde{U}})`</span>
|
||||
|
||||
<span class="sd"> where :math:`p_{\\alpha}` is the mixture of class-specific KDEs with mixture parameter (hence class prevalence)</span>
|
||||
<span class="sd"> :math:`\\alpha` defined by</span>
|
||||
|
||||
<span class="sd"> :math:`\\boldsymbol{p}_{\\alpha}(\\widetilde{x}) = \\sum_{i=1}^n \\alpha_i p_{\\widetilde{L}_i}(\\widetilde{x})`</span>
|
||||
|
||||
<span class="sd"> where :math:`p_X(\\boldsymbol{x}) = \\frac{1}{|X|} \\sum_{x_i\\in X} K\\left(\\frac{x-x_i}{h}\\right)` is the</span>
|
||||
<span class="sd"> KDE function that uses the datapoints in X as the kernel centers.</span>
|
||||
|
||||
<span class="sd"> In KDEy-CS, the divergence is taken to be the Cauchy-Schwarz divergence given by:</span>
|
||||
|
||||
<span class="sd"> :math:`\\mathcal{D}_{\\mathrm{CS}}(p||q)=-\\log\\left(\\frac{\\int p(x)q(x)dx}{\\sqrt{\\int p(x)^2dx \\int q(x)^2dx}}\\right)`</span>
|
||||
|
||||
<span class="sd"> The authors showed that this distribution matching admits a closed-form solution</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a binary classifier.</span>
|
||||
<span class="sd"> :param val_split: specifies the data used for generating classifier predictions. This specification</span>
|
||||
<span class="sd"> can be made as float in (0, 1) indicating the proportion of stratified held-out validation set to</span>
|
||||
<span class="sd"> be extracted from the training set; or as an integer (default 5), indicating that the predictions</span>
|
||||
<span class="sd"> are to be generated in a `k`-fold cross-validation manner (with this integer indicating the value</span>
|
||||
<span class="sd"> for `k`); or as a collection defining the specific set of data to use for validation.</span>
|
||||
<span class="sd"> Alternatively, this set can be specified at fit time by indicating the exact set of data</span>
|
||||
<span class="sd"> on which the predictions are to be generated.</span>
|
||||
<span class="sd"> :param bandwidth: float, the bandwidth of the Kernel</span>
|
||||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">bandwidth</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="n">KDEBase</span><span class="o">.</span><span class="n">_check_bandwidth</span><span class="p">(</span><span class="n">bandwidth</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span> <span class="o">=</span> <span class="n">bandwidth</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
|
||||
<div class="viewcode-block" id="KDEyCS.gram_matrix_mix_sum"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.gram_matrix_mix_sum">[docs]</a> <span class="k">def</span> <span class="nf">gram_matrix_mix_sum</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="c1"># this adapts the output of the rbf_kernel function (pairwise evaluations of Gaussian kernels k(x,y))</span>
|
||||
<span class="c1"># to contain pairwise evaluations of N(x|mu,Sigma1+Sigma2) with mu=y and Sigma1 and Sigma2 are </span>
|
||||
<span class="c1"># two "scalar matrices" (h^2)*I each, so Sigma1+Sigma2 has scalar 2(h^2) (h is the bandwidth)</span>
|
||||
<span class="n">h</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">bandwidth</span>
|
||||
<span class="n">variance</span> <span class="o">=</span> <span class="mi">2</span> <span class="o">*</span> <span class="p">(</span><span class="n">h</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="n">nD</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">gamma</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="p">(</span><span class="mi">2</span><span class="o">*</span><span class="n">variance</span><span class="p">)</span>
|
||||
<span class="n">norm_factor</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(((</span><span class="mi">2</span><span class="o">*</span><span class="n">np</span><span class="o">.</span><span class="n">pi</span><span class="p">)</span><span class="o">**</span><span class="n">nD</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">variance</span><span class="o">**</span><span class="p">(</span><span class="n">nD</span><span class="p">)))</span>
|
||||
<span class="n">gram</span> <span class="o">=</span> <span class="n">norm_factor</span> <span class="o">*</span> <span class="n">rbf_kernel</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">Y</span><span class="p">,</span> <span class="n">gamma</span><span class="o">=</span><span class="n">gamma</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">gram</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
|
||||
|
||||
<div class="viewcode-block" id="KDEyCS.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
|
||||
<span class="n">P</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||||
<span class="n">n</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
|
||||
|
||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">))</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="n">n</span><span class="p">)),</span> \
|
||||
<span class="s1">'label name gaps not allowed in current implementation'</span>
|
||||
|
||||
<span class="c1"># counts_inv keeps track of the relative weight of each datapoint within its class</span>
|
||||
<span class="c1"># (i.e., the weight in its KDE model)</span>
|
||||
<span class="n">counts_inv</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">/</span> <span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">counts</span><span class="p">())</span>
|
||||
|
||||
<span class="c1"># tr_tr_sums corresponds to symbol \overline{B} in the paper</span>
|
||||
<span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n</span><span class="p">,</span><span class="n">n</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
||||
<span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">i</span> <span class="o">></span> <span class="n">j</span><span class="p">:</span>
|
||||
<span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">i</span><span class="p">,</span><span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">j</span><span class="p">,</span><span class="n">i</span><span class="p">]</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">block</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gram_matrix_mix_sum</span><span class="p">(</span><span class="n">P</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">i</span><span class="p">],</span> <span class="n">P</span><span class="p">[</span><span class="n">y</span> <span class="o">==</span> <span class="n">j</span><span class="p">]</span> <span class="k">if</span> <span class="n">i</span><span class="o">!=</span><span class="n">j</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
<span class="n">tr_tr_sums</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">]</span> <span class="o">=</span> <span class="n">block</span>
|
||||
|
||||
<span class="c1"># keep track of these data structures for the test phase</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">Ptr</span> <span class="o">=</span> <span class="n">P</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">ytr</span> <span class="o">=</span> <span class="n">y</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="n">tr_tr_sums</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">counts_inv</span> <span class="o">=</span> <span class="n">counts_inv</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="KDEyCS.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._kdey.KDEyCS.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||||
<span class="n">Ptr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">Ptr</span>
|
||||
<span class="n">Pte</span> <span class="o">=</span> <span class="n">posteriors</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">ytr</span>
|
||||
<span class="n">tr_tr_sums</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">tr_tr_sums</span>
|
||||
|
||||
<span class="n">M</span><span class="p">,</span> <span class="n">nD</span> <span class="o">=</span> <span class="n">Pte</span><span class="o">.</span><span class="n">shape</span>
|
||||
<span class="n">Minv</span> <span class="o">=</span> <span class="p">(</span><span class="mi">1</span><span class="o">/</span><span class="n">M</span><span class="p">)</span> <span class="c1"># t in the paper</span>
|
||||
<span class="n">n</span> <span class="o">=</span> <span class="n">Ptr</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
|
||||
<span class="c1"># becomes a constant that does not affect the optimization, no need to compute it</span>
|
||||
<span class="c1"># partC = 0.5*np.log(self.gram_matrix_mix_sum(Pte) * Kinv * Kinv)</span>
|
||||
|
||||
<span class="c1"># tr_te_sums corresponds to \overline{a}*(1/Li)*(1/M) in the paper (note the constants</span>
|
||||
<span class="c1"># are already aggregated to tr_te_sums, so these multiplications are not carried out</span>
|
||||
<span class="c1"># at each iteration of the optimization phase)</span>
|
||||
<span class="n">tr_te_sums</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">n</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
|
||||
<span class="n">tr_te_sums</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">gram_matrix_mix_sum</span><span class="p">(</span><span class="n">Ptr</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">i</span><span class="p">],</span> <span class="n">Pte</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">divergence</span><span class="p">(</span><span class="n">alpha</span><span class="p">):</span>
|
||||
<span class="c1"># called \overline{r} in the paper</span>
|
||||
<span class="n">alpha_ratio</span> <span class="o">=</span> <span class="n">alpha</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">counts_inv</span>
|
||||
|
||||
<span class="c1"># recal that tr_te_sums already accounts for the constant terms (1/Li)*(1/M)</span>
|
||||
<span class="n">partA</span> <span class="o">=</span> <span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">((</span><span class="n">alpha_ratio</span> <span class="o">@</span> <span class="n">tr_te_sums</span><span class="p">)</span> <span class="o">*</span> <span class="n">Minv</span><span class="p">)</span>
|
||||
<span class="n">partB</span> <span class="o">=</span> <span class="mf">0.5</span> <span class="o">*</span> <span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">alpha_ratio</span> <span class="o">@</span> <span class="n">tr_tr_sums</span> <span class="o">@</span> <span class="n">alpha_ratio</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">partA</span> <span class="o">+</span> <span class="n">partB</span> <span class="c1">#+ partC</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">optim_minimize</span><span class="p">(</span><span class="n">divergence</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span></div></div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,520 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.method._neural — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||||
<script src="../../../_static/jquery.js"></script>
|
||||
<script src="../../../_static/underscore.js"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../../_static/doctools.js"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.method._neural</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.method._neural</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
|
||||
<span class="kn">import</span> <span class="nn">random</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">torch</span>
|
||||
<span class="kn">from</span> <span class="nn">torch.nn</span> <span class="kn">import</span> <span class="n">MSELoss</span>
|
||||
<span class="kn">from</span> <span class="nn">torch.nn.functional</span> <span class="kn">import</span> <span class="n">relu</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">UPP</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="o">*</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">EarlyStop</span>
|
||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="QuaNetTrainer"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer">[docs]</a><span class="k">class</span> <span class="nc">QuaNetTrainer</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implementation of `QuaNet <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_, a neural network for</span>
|
||||
<span class="sd"> quantification. This implementation uses `PyTorch <https://pytorch.org/>`_ and can take advantage of GPU</span>
|
||||
<span class="sd"> for speeding-up the training phase.</span>
|
||||
|
||||
<span class="sd"> Example:</span>
|
||||
|
||||
<span class="sd"> >>> import quapy as qp</span>
|
||||
<span class="sd"> >>> from quapy.method.meta import QuaNet</span>
|
||||
<span class="sd"> >>> from quapy.classification.neural import NeuralClassifierTrainer, CNNnet</span>
|
||||
<span class="sd"> >>></span>
|
||||
<span class="sd"> >>> # use samples of 100 elements</span>
|
||||
<span class="sd"> >>> qp.environ['SAMPLE_SIZE'] = 100</span>
|
||||
<span class="sd"> >>></span>
|
||||
<span class="sd"> >>> # load the kindle dataset as text, and convert words to numerical indexes</span>
|
||||
<span class="sd"> >>> dataset = qp.datasets.fetch_reviews('kindle', pickle=True)</span>
|
||||
<span class="sd"> >>> qp.train.preprocessing.index(dataset, min_df=5, inplace=True)</span>
|
||||
<span class="sd"> >>></span>
|
||||
<span class="sd"> >>> # the text classifier is a CNN trained by NeuralClassifierTrainer</span>
|
||||
<span class="sd"> >>> cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)</span>
|
||||
<span class="sd"> >>> classifier = NeuralClassifierTrainer(cnn, device='cuda')</span>
|
||||
<span class="sd"> >>></span>
|
||||
<span class="sd"> >>> # train QuaNet (QuaNet is an alias to QuaNetTrainer)</span>
|
||||
<span class="sd"> >>> model = QuaNet(classifier, qp.environ['SAMPLE_SIZE'], device='cuda')</span>
|
||||
<span class="sd"> >>> model.fit(dataset.training)</span>
|
||||
<span class="sd"> >>> estim_prevalence = model.quantify(dataset.test.instances)</span>
|
||||
|
||||
<span class="sd"> :param classifier: an object implementing `fit` (i.e., that can be trained on labelled data),</span>
|
||||
<span class="sd"> `predict_proba` (i.e., that can generate posterior probabilities of unlabelled examples) and</span>
|
||||
<span class="sd"> `transform` (i.e., that can generate embedded representations of the unlabelled instances).</span>
|
||||
<span class="sd"> :param sample_size: integer, the sample size; default is None, meaning that the sample size should be</span>
|
||||
<span class="sd"> taken from qp.environ["SAMPLE_SIZE"]</span>
|
||||
<span class="sd"> :param n_epochs: integer, maximum number of training epochs</span>
|
||||
<span class="sd"> :param tr_iter_per_poch: integer, number of training iterations before considering an epoch complete</span>
|
||||
<span class="sd"> :param va_iter_per_poch: integer, number of validation iterations to perform after each epoch</span>
|
||||
<span class="sd"> :param lr: float, the learning rate</span>
|
||||
<span class="sd"> :param lstm_hidden_size: integer, hidden dimensionality of the LSTM cells</span>
|
||||
<span class="sd"> :param lstm_nlayers: integer, number of LSTM layers</span>
|
||||
<span class="sd"> :param ff_layers: list of integers, dimensions of the densely-connected FF layers on top of the</span>
|
||||
<span class="sd"> quantification embedding</span>
|
||||
<span class="sd"> :param bidirectional: boolean, indicates whether the LSTM is bidirectional or not</span>
|
||||
<span class="sd"> :param qdrop_p: float, dropout probability</span>
|
||||
<span class="sd"> :param patience: integer, number of epochs showing no improvement in the validation set before stopping the</span>
|
||||
<span class="sd"> training phase (early stopping)</span>
|
||||
<span class="sd"> :param checkpointdir: string, a path where to store models' checkpoints</span>
|
||||
<span class="sd"> :param checkpointname: string (optional), the name of the model's checkpoint</span>
|
||||
<span class="sd"> :param device: string, indicate "cpu" or "cuda"</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
||||
<span class="n">classifier</span><span class="p">,</span>
|
||||
<span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">n_epochs</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
|
||||
<span class="n">tr_iter_per_poch</span><span class="o">=</span><span class="mi">500</span><span class="p">,</span>
|
||||
<span class="n">va_iter_per_poch</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
|
||||
<span class="n">lr</span><span class="o">=</span><span class="mf">1e-3</span><span class="p">,</span>
|
||||
<span class="n">lstm_hidden_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
|
||||
<span class="n">lstm_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="n">ff_layers</span><span class="o">=</span><span class="p">[</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">512</span><span class="p">],</span>
|
||||
<span class="n">bidirectional</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
||||
<span class="n">qdrop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span>
|
||||
<span class="n">patience</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
|
||||
<span class="n">checkpointdir</span><span class="o">=</span><span class="s1">'../checkpoint'</span><span class="p">,</span>
|
||||
<span class="n">checkpointname</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">):</span>
|
||||
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="s1">'transform'</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'the classifier </span><span class="si">{</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not seem to be able to produce document embeddings '</span> \
|
||||
<span class="sa">f</span><span class="s1">'since it does not implement the method "transform"'</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="s1">'predict_proba'</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'the classifier </span><span class="si">{</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> does not seem to be able to produce posterior probabilities '</span> \
|
||||
<span class="sa">f</span><span class="s1">'since it does not implement the method "predict_proba"'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_epochs</span> <span class="o">=</span> <span class="n">n_epochs</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">tr_iter</span> <span class="o">=</span> <span class="n">tr_iter_per_poch</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">va_iter</span> <span class="o">=</span> <span class="n">va_iter_per_poch</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">lr</span> <span class="o">=</span> <span class="n">lr</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'lstm_hidden_size'</span><span class="p">:</span> <span class="n">lstm_hidden_size</span><span class="p">,</span>
|
||||
<span class="s1">'lstm_nlayers'</span><span class="p">:</span> <span class="n">lstm_nlayers</span><span class="p">,</span>
|
||||
<span class="s1">'ff_layers'</span><span class="p">:</span> <span class="n">ff_layers</span><span class="p">,</span>
|
||||
<span class="s1">'bidirectional'</span><span class="p">:</span> <span class="n">bidirectional</span><span class="p">,</span>
|
||||
<span class="s1">'qdrop_p'</span><span class="p">:</span> <span class="n">qdrop_p</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="n">patience</span>
|
||||
<span class="k">if</span> <span class="n">checkpointname</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">local_random</span> <span class="o">=</span> <span class="n">random</span><span class="o">.</span><span class="n">Random</span><span class="p">()</span>
|
||||
<span class="n">random_code</span> <span class="o">=</span> <span class="s1">'-'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">local_random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1000000</span><span class="p">))</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">))</span>
|
||||
<span class="n">checkpointname</span> <span class="o">=</span> <span class="s1">'QuaNet-'</span><span class="o">+</span><span class="n">random_code</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span> <span class="o">=</span> <span class="n">checkpointdir</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">checkpointname</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">device</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="n">device</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">__check_params_colision</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">())</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<div class="viewcode-block" id="QuaNetTrainer.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Trains QuaNet.</span>
|
||||
|
||||
<span class="sd"> :param data: the training data on which to train QuaNet. If `fit_classifier=True`, the data will be split in</span>
|
||||
<span class="sd"> 40/40/20 for training the classifier, training QuaNet, and validating QuaNet, respectively. If</span>
|
||||
<span class="sd"> `fit_classifier=False`, the data will be split in 66/34 for training QuaNet and validating it, respectively.</span>
|
||||
<span class="sd"> :param fit_classifier: if True, trains the classifier on a split containing 40% of the data</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">classes_</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">fit_classifier</span><span class="p">:</span>
|
||||
<span class="n">classifier_data</span><span class="p">,</span> <span class="n">unused_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.4</span><span class="p">)</span>
|
||||
<span class="n">train_data</span><span class="p">,</span> <span class="n">valid_data</span> <span class="o">=</span> <span class="n">unused_data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.66</span><span class="p">)</span> <span class="c1"># 0.66 split of 60% makes 40% and 20%</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="o">*</span><span class="n">classifier_data</span><span class="o">.</span><span class="n">Xy</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">classifier_data</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="n">train_data</span><span class="p">,</span> <span class="n">valid_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.66</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># estimate the hard and soft stats tpr and fpr of the classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">tr_prev</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># compute the posterior probabilities of the instances</span>
|
||||
<span class="n">valid_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">valid_data</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">train_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">train_data</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># turn instances' original representations into embeddings</span>
|
||||
<span class="n">valid_data_embed</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">valid_data</span><span class="o">.</span><span class="n">instances</span><span class="p">),</span> <span class="n">valid_data</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span><span class="p">)</span>
|
||||
<span class="n">train_data_embed</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">train_data</span><span class="o">.</span><span class="n">instances</span><span class="p">),</span> <span class="n">train_data</span><span class="o">.</span><span class="n">labels</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'cc'</span><span class="p">:</span> <span class="n">CC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
|
||||
<span class="s1">'acc'</span><span class="p">:</span> <span class="n">ACC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">valid_data</span><span class="p">),</span>
|
||||
<span class="s1">'pcc'</span><span class="p">:</span> <span class="n">PCC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
|
||||
<span class="s1">'pacc'</span><span class="p">:</span> <span class="n">PACC</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="kc">None</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">valid_data</span><span class="p">),</span>
|
||||
<span class="p">}</span>
|
||||
<span class="k">if</span> <span class="n">classifier_data</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="p">[</span><span class="s1">'emq'</span><span class="p">]</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">classifier_data</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="p">{</span>
|
||||
<span class="s1">'tr-loss'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="s1">'va-loss'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="s1">'tr-mae'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="s1">'va-mae'</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="p">}</span>
|
||||
|
||||
<span class="n">nQ</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="p">)</span>
|
||||
<span class="n">nC</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span> <span class="o">=</span> <span class="n">QuaNetModule</span><span class="p">(</span>
|
||||
<span class="n">doc_embedding_size</span><span class="o">=</span><span class="n">train_data_embed</span><span class="o">.</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
|
||||
<span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span>
|
||||
<span class="n">stats_size</span><span class="o">=</span><span class="n">nQ</span><span class="o">*</span><span class="n">nC</span><span class="p">,</span>
|
||||
<span class="n">order_by</span><span class="o">=</span><span class="mi">0</span> <span class="k">if</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span> <span class="k">else</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span>
|
||||
<span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">Adam</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">parameters</span><span class="p">(),</span> <span class="n">lr</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">lr</span><span class="p">)</span>
|
||||
<span class="n">early_stop</span> <span class="o">=</span> <span class="n">EarlyStop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">patience</span><span class="p">,</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">checkpoint</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">epoch_i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">n_epochs</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_epoch</span><span class="p">(</span><span class="n">train_data_embed</span><span class="p">,</span> <span class="n">train_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tr_iter</span><span class="p">,</span> <span class="n">epoch_i</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_epoch</span><span class="p">(</span><span class="n">valid_data_embed</span><span class="p">,</span> <span class="n">valid_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">va_iter</span><span class="p">,</span> <span class="n">epoch_i</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="n">early_stop</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va-loss'</span><span class="p">],</span> <span class="n">epoch_i</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">early_stop</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
|
||||
<span class="n">torch</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">state_dict</span><span class="p">(),</span> <span class="n">checkpoint</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">early_stop</span><span class="o">.</span><span class="n">STOP</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'training ended by patience exhausted; loading best model parameters in </span><span class="si">{</span><span class="n">checkpoint</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'for epoch </span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">best_epoch</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">checkpoint</span><span class="p">))</span>
|
||||
<span class="k">break</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_aggregative_estims</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">):</span>
|
||||
<span class="n">label_predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="n">posteriors</span><span class="p">,</span> <span class="n">axis</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">prevs_estim</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">quantifier</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">quantifiers</span><span class="o">.</span><span class="n">values</span><span class="p">():</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">posteriors</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">)</span> <span class="k">else</span> <span class="n">label_predictions</span>
|
||||
<span class="n">prevs_estim</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">quantifier</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">predictions</span><span class="p">))</span>
|
||||
|
||||
<span class="c1"># there is no real need for adding static estims like the TPR or FPR from training since those are constant</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">prevs_estim</span>
|
||||
|
||||
<div class="viewcode-block" id="QuaNetTrainer.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">embeddings</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">quant_estims</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_aggregative_estims</span><span class="p">(</span><span class="n">posteriors</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">embeddings</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span> <span class="o">==</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">'cuda'</span><span class="p">):</span>
|
||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">prevalence</span><span class="o">.</span><span class="n">cpu</span><span class="p">()</span>
|
||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">prevalence</span><span class="o">.</span><span class="n">numpy</span><span class="p">()</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">prevalence</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_epoch</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">iterations</span><span class="p">,</span> <span class="n">epoch</span><span class="p">,</span> <span class="n">early_stop</span><span class="p">,</span> <span class="n">train</span><span class="p">):</span>
|
||||
<span class="n">mse_loss</span> <span class="o">=</span> <span class="n">MSELoss</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">mode</span><span class="o">=</span><span class="n">train</span><span class="p">)</span>
|
||||
<span class="n">losses</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">mae_errors</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">sampler</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span>
|
||||
<span class="n">data</span><span class="p">,</span>
|
||||
<span class="n">sample_size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span>
|
||||
<span class="n">repeats</span><span class="o">=</span><span class="n">iterations</span><span class="p">,</span>
|
||||
<span class="n">random_state</span><span class="o">=</span><span class="kc">None</span> <span class="k">if</span> <span class="n">train</span> <span class="k">else</span> <span class="mi">0</span> <span class="c1"># different samples during train, same samples during validation</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">pbar</span> <span class="o">=</span> <span class="n">tqdm</span><span class="p">(</span><span class="n">sampler</span><span class="o">.</span><span class="n">samples_parameters</span><span class="p">(),</span> <span class="n">total</span><span class="o">=</span><span class="n">sampler</span><span class="o">.</span><span class="n">total</span><span class="p">())</span>
|
||||
<span class="k">for</span> <span class="n">it</span><span class="p">,</span> <span class="n">index</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">pbar</span><span class="p">):</span>
|
||||
<span class="n">sample_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
||||
<span class="n">sample_posteriors</span> <span class="o">=</span> <span class="n">posteriors</span><span class="p">[</span><span class="n">index</span><span class="p">]</span>
|
||||
<span class="n">quant_estims</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_aggregative_estims</span><span class="p">(</span><span class="n">sample_posteriors</span><span class="p">)</span>
|
||||
<span class="n">ptrue</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">([</span><span class="n">sample_data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
|
||||
<span class="n">phat</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">sample_data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">sample_posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
|
||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">mse_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
|
||||
<span class="n">mae</span> <span class="o">=</span> <span class="n">mae_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
|
||||
<span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">optim</span><span class="o">.</span><span class="n">step</span><span class="p">()</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
|
||||
<span class="n">phat</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet</span><span class="o">.</span><span class="n">forward</span><span class="p">(</span><span class="n">sample_data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">sample_posteriors</span><span class="p">,</span> <span class="n">quant_estims</span><span class="p">)</span>
|
||||
<span class="n">loss</span> <span class="o">=</span> <span class="n">mse_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
|
||||
<span class="n">mae</span> <span class="o">=</span> <span class="n">mae_loss</span><span class="p">(</span><span class="n">phat</span><span class="p">,</span> <span class="n">ptrue</span><span class="p">)</span>
|
||||
|
||||
<span class="n">losses</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">loss</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
|
||||
<span class="n">mae_errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">mae</span><span class="o">.</span><span class="n">item</span><span class="p">())</span>
|
||||
|
||||
<span class="n">mse</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">losses</span><span class="p">)</span>
|
||||
<span class="n">mae</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">mae_errors</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'tr-loss'</span><span class="p">]</span> <span class="o">=</span> <span class="n">mse</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'tr-mae'</span><span class="p">]</span> <span class="o">=</span> <span class="n">mae</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va-loss'</span><span class="p">]</span> <span class="o">=</span> <span class="n">mse</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s1">'va-mae'</span><span class="p">]</span> <span class="o">=</span> <span class="n">mae</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">train</span><span class="p">:</span>
|
||||
<span class="n">pbar</span><span class="o">.</span><span class="n">set_description</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[QuaNet] '</span>
|
||||
<span class="sa">f</span><span class="s1">'epoch=</span><span class="si">{</span><span class="n">epoch</span><span class="si">}</span><span class="s1"> [it=</span><span class="si">{</span><span class="n">it</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">iterations</span><span class="si">}</span><span class="s1">]</span><span class="se">\t</span><span class="s1">'</span>
|
||||
<span class="sa">f</span><span class="s1">'tr-mseloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr-loss"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> tr-maeloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"tr-mae"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="se">\t</span><span class="s1">'</span>
|
||||
<span class="sa">f</span><span class="s1">'val-mseloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va-loss"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> val-maeloss=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="p">[</span><span class="s2">"va-mae"</span><span class="p">]</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> '</span>
|
||||
<span class="sa">f</span><span class="s1">'patience=</span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">patience</span><span class="si">}</span><span class="s1">/</span><span class="si">{</span><span class="n">early_stop</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="QuaNetTrainer.get_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="n">classifier_params</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">()</span>
|
||||
<span class="n">classifier_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__'</span><span class="o">+</span><span class="n">k</span><span class="p">:</span><span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span><span class="n">v</span> <span class="ow">in</span> <span class="n">classifier_params</span><span class="o">.</span><span class="n">items</span><span class="p">()}</span>
|
||||
<span class="k">return</span> <span class="p">{</span><span class="o">**</span><span class="n">classifier_params</span><span class="p">,</span> <span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">}</span></div>
|
||||
|
||||
<div class="viewcode-block" id="QuaNetTrainer.set_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
|
||||
<span class="n">learner_params</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">parameters</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">quanet_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
|
||||
<span class="k">elif</span> <span class="n">key</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'classifier__'</span><span class="p">):</span>
|
||||
<span class="n">learner_params</span><span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'classifier__'</span><span class="p">,</span> <span class="s1">''</span><span class="p">)]</span> <span class="o">=</span> <span class="n">val</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'unknown parameter '</span><span class="p">,</span> <span class="n">key</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">learner_params</span><span class="p">)</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__check_params_colision</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quanet_params</span><span class="p">,</span> <span class="n">learner_params</span><span class="p">):</span>
|
||||
<span class="n">quanet_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">quanet_params</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
||||
<span class="n">learner_keys</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">learner_params</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
||||
<span class="n">intersection</span> <span class="o">=</span> <span class="n">quanet_keys</span><span class="o">.</span><span class="n">intersection</span><span class="p">(</span><span class="n">learner_keys</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">intersection</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the use of parameters </span><span class="si">{</span><span class="n">intersection</span><span class="si">}</span><span class="s1"> is ambiguous sine those can refer to '</span>
|
||||
<span class="sa">f</span><span class="s1">'the parameters of QuaNet or the learner </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">classifier</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="QuaNetTrainer.clean_checkpoint"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.clean_checkpoint">[docs]</a> <span class="k">def</span> <span class="nf">clean_checkpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Removes the checkpoint</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpoint</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="QuaNetTrainer.clean_checkpoint_dir"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetTrainer.clean_checkpoint_dir">[docs]</a> <span class="k">def</span> <span class="nf">clean_checkpoint_dir</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Removes anything contained in the checkpoint directory</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="kn">import</span> <span class="nn">shutil</span>
|
||||
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">checkpointdir</span><span class="p">,</span> <span class="n">ignore_errors</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mae_loss"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.mae_loss">[docs]</a><span class="k">def</span> <span class="nf">mae_loss</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">target</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Torch-like wrapper for the Mean Absolute Error</span>
|
||||
|
||||
<span class="sd"> :param output: predictions</span>
|
||||
<span class="sd"> :param target: ground truth values</span>
|
||||
<span class="sd"> :return: mean absolute error loss</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">output</span> <span class="o">-</span> <span class="n">target</span><span class="p">))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="QuaNetModule"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetModule">[docs]</a><span class="k">class</span> <span class="nc">QuaNetModule</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Module</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements the `QuaNet <https://dl.acm.org/doi/abs/10.1145/3269206.3269287>`_ forward pass.</span>
|
||||
<span class="sd"> See :class:`QuaNetTrainer` for training QuaNet.</span>
|
||||
|
||||
<span class="sd"> :param doc_embedding_size: integer, the dimensionality of the document embeddings</span>
|
||||
<span class="sd"> :param n_classes: integer, number of classes</span>
|
||||
<span class="sd"> :param stats_size: integer, number of statistics estimated by simple quantification methods</span>
|
||||
<span class="sd"> :param lstm_hidden_size: integer, hidden dimensionality of the LSTM cell</span>
|
||||
<span class="sd"> :param lstm_nlayers: integer, number of LSTM layers</span>
|
||||
<span class="sd"> :param ff_layers: list of integers, dimensions of the densely-connected FF layers on top of the</span>
|
||||
<span class="sd"> quantification embedding</span>
|
||||
<span class="sd"> :param bidirectional: boolean, whether or not to use bidirectional LSTM</span>
|
||||
<span class="sd"> :param qdrop_p: float, dropout probability</span>
|
||||
<span class="sd"> :param order_by: integer, class for which the document embeddings are to be sorted</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
||||
<span class="n">doc_embedding_size</span><span class="p">,</span>
|
||||
<span class="n">n_classes</span><span class="p">,</span>
|
||||
<span class="n">stats_size</span><span class="p">,</span>
|
||||
<span class="n">lstm_hidden_size</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
|
||||
<span class="n">lstm_nlayers</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="n">ff_layers</span><span class="o">=</span><span class="p">[</span><span class="mi">1024</span><span class="p">,</span> <span class="mi">512</span><span class="p">],</span>
|
||||
<span class="n">bidirectional</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
||||
<span class="n">qdrop_p</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span>
|
||||
<span class="n">order_by</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
|
||||
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_classes</span> <span class="o">=</span> <span class="n">n_classes</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">order_by</span> <span class="o">=</span> <span class="n">order_by</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span> <span class="o">=</span> <span class="n">lstm_hidden_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">=</span> <span class="n">lstm_nlayers</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="o">=</span> <span class="n">bidirectional</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span> <span class="o">=</span> <span class="mi">2</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="k">else</span> <span class="mi">1</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">qdrop_p</span> <span class="o">=</span> <span class="n">qdrop_p</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">LSTM</span><span class="p">(</span><span class="n">doc_embedding_size</span> <span class="o">+</span> <span class="n">n_classes</span><span class="p">,</span> <span class="c1"># +n_classes stands for the posterior probs. (concatenated)</span>
|
||||
<span class="n">lstm_hidden_size</span><span class="p">,</span> <span class="n">lstm_nlayers</span><span class="p">,</span> <span class="n">bidirectional</span><span class="o">=</span><span class="n">bidirectional</span><span class="p">,</span>
|
||||
<span class="n">dropout</span><span class="o">=</span><span class="n">qdrop_p</span><span class="p">,</span> <span class="n">batch_first</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">qdrop_p</span><span class="p">)</span>
|
||||
|
||||
<span class="n">lstm_output_size</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span>
|
||||
<span class="n">ff_input_size</span> <span class="o">=</span> <span class="n">lstm_output_size</span> <span class="o">+</span> <span class="n">stats_size</span>
|
||||
<span class="n">prev_size</span> <span class="o">=</span> <span class="n">ff_input_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">lin_size</span> <span class="ow">in</span> <span class="n">ff_layers</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">prev_size</span><span class="p">,</span> <span class="n">lin_size</span><span class="p">))</span>
|
||||
<span class="n">prev_size</span> <span class="o">=</span> <span class="n">lin_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">output</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Linear</span><span class="p">(</span><span class="n">prev_size</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">)</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">device</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">'cuda'</span><span class="p">)</span> <span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span> <span class="k">else</span> <span class="n">torch</span><span class="o">.</span><span class="n">device</span><span class="p">(</span><span class="s1">'cpu'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_init_hidden</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">directions</span> <span class="o">=</span> <span class="mi">2</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">bidirectional</span> <span class="k">else</span> <span class="mi">1</span>
|
||||
<span class="n">var_hidden</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">*</span> <span class="n">directions</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
|
||||
<span class="n">var_cell</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span> <span class="o">*</span> <span class="n">directions</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">next</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">parameters</span><span class="p">())</span><span class="o">.</span><span class="n">is_cuda</span><span class="p">:</span>
|
||||
<span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span> <span class="o">=</span> <span class="n">var_hidden</span><span class="o">.</span><span class="n">cuda</span><span class="p">(),</span> <span class="n">var_cell</span><span class="o">.</span><span class="n">cuda</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">var_hidden</span><span class="p">,</span> <span class="n">var_cell</span>
|
||||
|
||||
<div class="viewcode-block" id="QuaNetModule.forward"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._neural.QuaNetModule.forward">[docs]</a> <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">doc_posteriors</span><span class="p">,</span> <span class="n">statistics</span><span class="p">):</span>
|
||||
<span class="n">device</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">device</span>
|
||||
<span class="n">doc_embeddings</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="n">doc_posteriors</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">doc_posteriors</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
|
||||
<span class="n">statistics</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">as_tensor</span><span class="p">(</span><span class="n">statistics</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="n">device</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">order_by</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">order</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">doc_posteriors</span><span class="p">[:,</span> <span class="bp">self</span><span class="o">.</span><span class="n">order_by</span><span class="p">])</span>
|
||||
<span class="n">doc_embeddings</span> <span class="o">=</span> <span class="n">doc_embeddings</span><span class="p">[</span><span class="n">order</span><span class="p">]</span>
|
||||
<span class="n">doc_posteriors</span> <span class="o">=</span> <span class="n">doc_posteriors</span><span class="p">[</span><span class="n">order</span><span class="p">]</span>
|
||||
|
||||
<span class="n">embeded_posteriors</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">doc_embeddings</span><span class="p">,</span> <span class="n">doc_posteriors</span><span class="p">),</span> <span class="n">dim</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># the entire set represents only one instance in quapy contexts, and so the batch_size=1</span>
|
||||
<span class="c1"># the shape should be (1, number-of-instances, embedding-size + n_classes)</span>
|
||||
<span class="n">embeded_posteriors</span> <span class="o">=</span> <span class="n">embeded_posteriors</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="o">.</span><span class="n">flatten_parameters</span><span class="p">()</span>
|
||||
<span class="n">_</span><span class="p">,</span> <span class="p">(</span><span class="n">rnn_hidden</span><span class="p">,</span><span class="n">_</span><span class="p">)</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">lstm</span><span class="p">(</span><span class="n">embeded_posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_init_hidden</span><span class="p">())</span>
|
||||
<span class="n">rnn_hidden</span> <span class="o">=</span> <span class="n">rnn_hidden</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nlayers</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">ndirections</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">hidden_size</span><span class="p">)</span>
|
||||
<span class="n">quant_embedding</span> <span class="o">=</span> <span class="n">rnn_hidden</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">quant_embedding</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">quant_embedding</span><span class="p">,</span> <span class="n">statistics</span><span class="p">))</span>
|
||||
|
||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="n">quant_embedding</span><span class="o">.</span><span class="n">unsqueeze</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">linear</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ff_layers</span><span class="p">:</span>
|
||||
<span class="n">abstracted</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">relu</span><span class="p">(</span><span class="n">linear</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)))</span>
|
||||
|
||||
<span class="n">logits</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">output</span><span class="p">(</span><span class="n">abstracted</span><span class="p">)</span><span class="o">.</span><span class="n">view</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">prevalence</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">prevalence</span></div></div>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,364 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.method._threshold_optim — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||||
<script src="../../../_static/jquery.js"></script>
|
||||
<script src="../../../_static/underscore.js"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../../_static/doctools.js"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.method._threshold_optim</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.method._threshold_optim</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">abstractmethod</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">BinaryAggregativeQuantifier</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ThresholdOptimization"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization">[docs]</a><span class="k">class</span> <span class="nc">ThresholdOptimization</span><span class="p">(</span><span class="n">BinaryAggregativeQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Abstract class of Threshold Optimization variants for :class:`ACC` as proposed by</span>
|
||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_.</span>
|
||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
||||
<span class="sd"> The different variants are based on different heuristics for choosing a decision threshold</span>
|
||||
<span class="sd"> that would allow for more true positives and many more false positives, on the grounds this</span>
|
||||
<span class="sd"> would deliver larger denominators.</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">classifier</span> <span class="o">=</span> <span class="n">classifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="ThresholdOptimization.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.condition">[docs]</a> <span class="nd">@abstractmethod</span>
|
||||
<span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements the criterion according to which the threshold should be selected.</span>
|
||||
<span class="sd"> This function should return the (float) score to be minimized.</span>
|
||||
|
||||
<span class="sd"> :param tpr: float, true positive rate</span>
|
||||
<span class="sd"> :param fpr: float, false positive rate</span>
|
||||
<span class="sd"> :return: float, a score for the given `tpr` and `fpr`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ThresholdOptimization.discard"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.discard">[docs]</a> <span class="k">def</span> <span class="nf">discard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Indicates whether a combination of tpr and fpr should be discarded</span>
|
||||
|
||||
<span class="sd"> :param tpr: float, true positive rate</span>
|
||||
<span class="sd"> :param fpr: float, false positive rate</span>
|
||||
<span class="sd"> :return: true if the combination is to be discarded, false otherwise</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">tpr</span> <span class="o">-</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_eval_candidate_thresholds</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Seeks for the best `tpr` and `fpr` according to the score obtained at different</span>
|
||||
<span class="sd"> decision thresholds. The scoring function is implemented in function `_condition`.</span>
|
||||
|
||||
<span class="sd"> :param decision_scores: array-like with the classification scores</span>
|
||||
<span class="sd"> :param y: predicted labels for the validation set (or for the training set via `k`-fold cross validation)</span>
|
||||
<span class="sd"> :return: best `tpr` and `fpr` and `threshold` according to `_condition`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">candidate_thresholds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">)</span>
|
||||
|
||||
<span class="n">candidates</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">scores</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">candidate_threshold</span> <span class="ow">in</span> <span class="n">candidate_thresholds</span><span class="p">:</span>
|
||||
<span class="n">y_</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span><span class="p">[</span><span class="mi">1</span> <span class="o">*</span> <span class="p">(</span><span class="n">decision_scores</span> <span class="o">>=</span> <span class="n">candidate_threshold</span><span class="p">)]</span>
|
||||
<span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">FN</span><span class="p">,</span> <span class="n">TN</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_table</span><span class="p">(</span><span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">)</span>
|
||||
<span class="n">tpr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_tpr</span><span class="p">(</span><span class="n">TP</span><span class="p">,</span> <span class="n">FN</span><span class="p">)</span>
|
||||
<span class="n">fpr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_fpr</span><span class="p">(</span><span class="n">FP</span><span class="p">,</span> <span class="n">TN</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">discard</span><span class="p">(</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">):</span>
|
||||
<span class="n">candidate_score</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">condition</span><span class="p">(</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span>
|
||||
<span class="n">candidates</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">candidate_threshold</span><span class="p">])</span>
|
||||
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">candidate_score</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">candidates</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="c1"># if no candidate gives rise to a valid combination of tpr and fpr, this method defaults to the standard</span>
|
||||
<span class="c1"># classify & count; this is akin to assign tpr=1, fpr=0, threshold=0</span>
|
||||
<span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">threshold</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span>
|
||||
<span class="n">candidates</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">,</span> <span class="n">threshold</span><span class="p">])</span>
|
||||
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
|
||||
<span class="n">candidates</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">candidates</span><span class="p">)</span>
|
||||
<span class="n">candidates</span> <span class="o">=</span> <span class="n">candidates</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">scores</span><span class="p">)]</span> <span class="c1"># sort candidates by candidate_score</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">candidates</span>
|
||||
|
||||
<div class="viewcode-block" id="ThresholdOptimization.aggregate_with_threshold"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregate_with_threshold">[docs]</a> <span class="k">def</span> <span class="nf">aggregate_with_threshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">,</span> <span class="n">tprs</span><span class="p">,</span> <span class="n">fprs</span><span class="p">,</span> <span class="n">thresholds</span><span class="p">):</span>
|
||||
<span class="c1"># This function performs the adjusted count for given tpr, fpr, and threshold.</span>
|
||||
<span class="c1"># Note that, due to broadcasting, tprs, fprs, and thresholds could be arrays of length > 1</span>
|
||||
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">[:,</span> <span class="kc">None</span><span class="p">]</span> <span class="o">>=</span> <span class="n">thresholds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="p">(</span><span class="n">prevs_estims</span> <span class="o">-</span> <span class="n">fprs</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span><span class="n">tprs</span> <span class="o">-</span> <span class="n">fprs</span><span class="p">)</span>
|
||||
<span class="n">prevs_estims</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">as_binary_prevalence</span><span class="p">(</span><span class="n">prevs_estims</span><span class="p">,</span> <span class="n">clip_if_necessary</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">prevs_estims</span><span class="o">.</span><span class="n">squeeze</span><span class="p">()</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_compute_table</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">y_</span><span class="p">):</span>
|
||||
<span class="n">TP</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="n">FP</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">!=</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="n">FN</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">!=</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">pos_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="n">TN</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="n">y</span> <span class="o">==</span> <span class="n">y_</span><span class="p">,</span> <span class="n">y</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">neg_label</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">FN</span><span class="p">,</span> <span class="n">TN</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_compute_tpr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">TP</span><span class="p">,</span> <span class="n">FP</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">TP</span> <span class="o">+</span> <span class="n">FP</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="mi">1</span>
|
||||
<span class="k">return</span> <span class="n">TP</span> <span class="o">/</span> <span class="p">(</span><span class="n">TP</span> <span class="o">+</span> <span class="n">FP</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_compute_fpr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">FP</span><span class="p">,</span> <span class="n">TN</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">FP</span> <span class="o">+</span> <span class="n">TN</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="mi">0</span>
|
||||
<span class="k">return</span> <span class="n">FP</span> <span class="o">/</span> <span class="p">(</span><span class="n">FP</span> <span class="o">+</span> <span class="n">TN</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="ThresholdOptimization.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||||
<span class="c1"># the standard behavior is to keep the best threshold only</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">tpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">threshold</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_eval_candidate_thresholds</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ThresholdOptimization.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.ThresholdOptimization.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||||
<span class="c1"># the standard behavior is to compute the adjusted count using the best threshold found</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate_with_threshold</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fpr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">threshold</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="T50"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.T50">[docs]</a><span class="k">class</span> <span class="nc">T50</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks</span>
|
||||
<span class="sd"> for the threshold that makes `tpr` closest to 0.5.</span>
|
||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="T50.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.T50.condition">[docs]</a> <span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="n">tpr</span> <span class="o">-</span> <span class="mf">0.5</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MAX"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MAX">[docs]</a><span class="k">class</span> <span class="nc">MAX</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks</span>
|
||||
<span class="sd"> for the threshold that maximizes `tpr-fpr`.</span>
|
||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="MAX.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MAX.condition">[docs]</a> <span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
||||
<span class="c1"># MAX strives to maximize (tpr - fpr), which is equivalent to minimize (fpr - tpr)</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">fpr</span> <span class="o">-</span> <span class="n">tpr</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="X"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.X">[docs]</a><span class="k">class</span> <span class="nc">X</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that looks</span>
|
||||
<span class="sd"> for the threshold that yields `tpr=1-fpr`.</span>
|
||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="X.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.X.condition">[docs]</a> <span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="nb">abs</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="p">(</span><span class="n">tpr</span> <span class="o">+</span> <span class="n">fpr</span><span class="p">))</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MS"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS">[docs]</a><span class="k">class</span> <span class="nc">MS</span><span class="p">(</span><span class="n">ThresholdOptimization</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Median Sweep. Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that generates</span>
|
||||
<span class="sd"> class prevalence estimates for all decision thresholds and returns the median of them all.</span>
|
||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="MS.condition"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.condition">[docs]</a> <span class="k">def</span> <span class="nf">condition</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="mi">1</span></div>
|
||||
|
||||
<div class="viewcode-block" id="MS.aggregation_fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.aggregation_fit">[docs]</a> <span class="k">def</span> <span class="nf">aggregation_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">classif_predictions</span><span class="o">.</span><span class="n">Xy</span>
|
||||
<span class="c1"># keeps all candidates</span>
|
||||
<span class="n">tprs_fprs_thresholds</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_eval_candidate_thresholds</span><span class="p">(</span><span class="n">decision_scores</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">tprs</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">0</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">fprs</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">thresholds</span> <span class="o">=</span> <span class="n">tprs_fprs_thresholds</span><span class="p">[:,</span> <span class="mi">2</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="MS.aggregate"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classif_predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">):</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">aggregate_with_threshold</span><span class="p">(</span><span class="n">classif_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">tprs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fprs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">thresholds</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">prevalences</span><span class="o">.</span><span class="n">ndim</span><span class="o">==</span><span class="mi">2</span><span class="p">:</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prevalences</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">prevalences</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MS2"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS2">[docs]</a><span class="k">class</span> <span class="nc">MS2</span><span class="p">(</span><span class="n">MS</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Median Sweep 2. Threshold Optimization variant for :class:`ACC` as proposed by</span>
|
||||
<span class="sd"> `Forman 2006 <https://dl.acm.org/doi/abs/10.1145/1150402.1150423>`_ and</span>
|
||||
<span class="sd"> `Forman 2008 <https://link.springer.com/article/10.1007/s10618-008-0097-y>`_ that generates</span>
|
||||
<span class="sd"> class prevalence estimates for all decision thresholds and returns the median of for cases in</span>
|
||||
<span class="sd"> which `tpr-fpr>0.25`</span>
|
||||
<span class="sd"> The goal is to bring improved stability to the denominator of the adjustment.</span>
|
||||
|
||||
<span class="sd"> :param classifier: a sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param val_split: indicates the proportion of data to be used as a stratified held-out validation set in which the</span>
|
||||
<span class="sd"> misclassification rates are to be estimated.</span>
|
||||
<span class="sd"> This parameter can be indicated as a real value (between 0 and 1), representing a proportion of</span>
|
||||
<span class="sd"> validation data, or as an integer, indicating that the misclassification rates should be estimated via</span>
|
||||
<span class="sd"> `k`-fold cross validation (this integer stands for the number of folds `k`, defaults 5), or as a</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">classifier</span><span class="p">:</span> <span class="n">BaseEstimator</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">5</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">val_split</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="MS2.discard"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method._threshold_optim.MS2.discard">[docs]</a> <span class="k">def</span> <span class="nf">discard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tpr</span><span class="p">,</span> <span class="n">fpr</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">tpr</span><span class="o">-</span><span class="n">fpr</span><span class="p">)</span> <span class="o"><=</span> <span class="mf">0.25</span></div></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.method.base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||||
<script src="../../../_static/jquery.js"></script>
|
||||
<script src="../../../_static/underscore.js"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../../_static/doctools.js"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.method.base</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.method.base</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
|
||||
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">joblib</span> <span class="kn">import</span> <span class="n">Parallel</span><span class="p">,</span> <span class="n">delayed</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.base</span> <span class="kn">import</span> <span class="n">BaseEstimator</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
|
||||
<span class="c1"># Base Quantifier abstract class</span>
|
||||
<span class="c1"># ------------------------------------</span>
|
||||
<div class="viewcode-block" id="BaseQuantifier"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier">[docs]</a><span class="k">class</span> <span class="nc">BaseQuantifier</span><span class="p">(</span><span class="n">BaseEstimator</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Abstract Quantifier. A quantifier is defined as an object of a class that implements the method :meth:`fit` on</span>
|
||||
<span class="sd"> :class:`quapy.data.base.LabelledCollection`, the method :meth:`quantify`, and the :meth:`set_params` and</span>
|
||||
<span class="sd"> :meth:`get_params` for model selection (see :meth:`quapy.model_selection.GridSearchQ`)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<div class="viewcode-block" id="BaseQuantifier.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier.fit">[docs]</a> <span class="nd">@abstractmethod</span>
|
||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Trains a quantifier.</span>
|
||||
|
||||
<span class="sd"> :param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span></div>
|
||||
|
||||
<div class="viewcode-block" id="BaseQuantifier.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BaseQuantifier.quantify">[docs]</a> <span class="nd">@abstractmethod</span>
|
||||
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generate class prevalence estimates for the sample's instances</span>
|
||||
|
||||
<span class="sd"> :param instances: array-like</span>
|
||||
<span class="sd"> :return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="BinaryQuantifier"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.BinaryQuantifier">[docs]</a><span class="k">class</span> <span class="nc">BinaryQuantifier</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Abstract class of binary quantifiers, i.e., quantifiers estimating class prevalence values for only two classes</span>
|
||||
<span class="sd"> (typically, to be interpreted as one class and its complement).</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_check_binary</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">quantifier_name</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">quantifier_name</span><span class="si">}</span><span class="s1"> works only on problems of binary classification. '</span> \
|
||||
<span class="sa">f</span><span class="s1">'Use the class OneVsAll to enable </span><span class="si">{</span><span class="n">quantifier_name</span><span class="si">}</span><span class="s1"> work on single-label data.'</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="OneVsAll"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAll">[docs]</a><span class="k">class</span> <span class="nc">OneVsAll</span><span class="p">:</span>
|
||||
<span class="k">pass</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="newOneVsAll"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.newOneVsAll">[docs]</a><span class="k">def</span> <span class="nf">newOneVsAll</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be a Quantifier'</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">OneVsAllAggregative</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">OneVsAllGeneric</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="OneVsAllGeneric"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric">[docs]</a><span class="k">class</span> <span class="nc">OneVsAllGeneric</span><span class="p">(</span><span class="n">OneVsAll</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary</span>
|
||||
<span class="sd"> quantifier for each class, and then l1-normalizes the outputs so that the class prevelence values sum up to 1.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">BaseQuantifier</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">binary_quantifier</span><span class="si">}</span><span class="s1"> does not seem to be a Quantifier'</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">binary_quantifier</span><span class="p">,</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[warning] the quantifier seems to be an instance of qp.method.aggregative.AggregativeQuantifier; '</span>
|
||||
<span class="sa">f</span><span class="s1">'you might prefer instantiating </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">OneVsAllAggregative</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span> <span class="o">=</span> <span class="n">binary_quantifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="OneVsAllGeneric.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">fit_classifier</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="ow">not</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> expect non-binary data'</span>
|
||||
<span class="k">assert</span> <span class="n">fit_classifier</span> <span class="o">==</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'fit_classifier must be True'</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span> <span class="o">=</span> <span class="p">{</span><span class="n">c</span><span class="p">:</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">binary_quantifier</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">classes_</span><span class="p">}</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_fit</span><span class="p">,</span> <span class="n">data</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
|
||||
<span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="s1">'threading'</span><span class="p">)(</span>
|
||||
<span class="n">delayed</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span><span class="n">c</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">classes_</span>
|
||||
<span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="OneVsAllGeneric.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.base.OneVsAllGeneric.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="n">prevalences</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_parallel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_delayed_binary_predict</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">normalize_prevalence</span><span class="p">(</span><span class="n">prevalences</span><span class="p">)</span></div>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">classes_</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="nb">sorted</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_binary_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">X</span><span class="p">)[</span><span class="mi">1</span><span class="p">]</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_binary_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">c</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
|
||||
<span class="n">bindata</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">labels</span> <span class="o">==</span> <span class="n">c</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="p">[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">])</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">dict_binary_quantifiers</span><span class="p">[</span><span class="n">c</span><span class="p">]</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">bindata</span><span class="p">)</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,796 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.method.meta — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||||
<script src="../../../_static/jquery.js"></script>
|
||||
<script src="../../../_static/underscore.js"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../../_static/doctools.js"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.method.meta</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.method.meta</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
|
||||
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.metrics</span> <span class="kn">import</span> <span class="n">f1_score</span><span class="p">,</span> <span class="n">make_scorer</span><span class="p">,</span> <span class="n">accuracy_score</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchCV</span><span class="p">,</span> <span class="n">cross_val_predict</span>
|
||||
<span class="kn">from</span> <span class="nn">tqdm</span> <span class="kn">import</span> <span class="n">tqdm</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy</span> <span class="kn">import</span> <span class="n">functional</span> <span class="k">as</span> <span class="n">F</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">CC</span><span class="p">,</span> <span class="n">ACC</span><span class="p">,</span> <span class="n">PACC</span><span class="p">,</span> <span class="n">HDy</span><span class="p">,</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="kn">from</span> <span class="nn">.</span> <span class="kn">import</span> <span class="n">_neural</span>
|
||||
<span class="k">except</span> <span class="ne">ModuleNotFoundError</span><span class="p">:</span>
|
||||
<span class="n">_neural</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="n">_neural</span><span class="p">:</span>
|
||||
<span class="n">QuaNet</span> <span class="o">=</span> <span class="n">_neural</span><span class="o">.</span><span class="n">QuaNetTrainer</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">QuaNet</span> <span class="o">=</span> <span class="s2">"QuaNet is not available due to missing torch package"</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator2"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2">[docs]</a><span class="k">class</span> <span class="nc">MedianEstimator2</span><span class="p">(</span><span class="n">BinaryQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the</span>
|
||||
<span class="sd"> estimation returned by differently (hyper)parameterized base quantifiers.</span>
|
||||
<span class="sd"> The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,</span>
|
||||
<span class="sd"> i.e., in cases of binary quantification.</span>
|
||||
|
||||
<span class="sd"> :param base_quantifier: the base, binary quantifier</span>
|
||||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
||||
<span class="sd"> :param param_grid: the grid or parameters towards which the median will be computed</span>
|
||||
<span class="sd"> :param n_jobs: number of parllel workes</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">base_quantifier</span><span class="p">:</span> <span class="n">BinaryQuantifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator2.get_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">deep</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator2.set_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
||||
<span class="n">params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">model</span>
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator2.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
|
||||
|
||||
<span class="n">configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit</span><span class="p">,</span>
|
||||
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">configs</span><span class="p">),</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||||
<span class="n">model</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="k">return</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator2.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator2.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_predict</span><span class="p">,</span>
|
||||
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">models</span><span class="p">),</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator">[docs]</a><span class="k">class</span> <span class="nc">MedianEstimator</span><span class="p">(</span><span class="n">BinaryQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> This method is a meta-quantifier that returns, as the estimated class prevalence values, the median of the</span>
|
||||
<span class="sd"> estimation returned by differently (hyper)parameterized base quantifiers.</span>
|
||||
<span class="sd"> The median of unit-vectors is only guaranteed to be a unit-vector for n=2 dimensions,</span>
|
||||
<span class="sd"> i.e., in cases of binary quantification.</span>
|
||||
|
||||
<span class="sd"> :param base_quantifier: the base, binary quantifier</span>
|
||||
<span class="sd"> :param random_state: a seed to be set before fitting any base quantifier (default None)</span>
|
||||
<span class="sd"> :param param_grid: the grid or parameters towards which the median will be computed</span>
|
||||
<span class="sd"> :param n_jobs: number of parllel workes</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">base_quantifier</span><span class="p">:</span> <span class="n">BinaryQuantifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator.get_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">get_params</span><span class="p">(</span><span class="n">deep</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator.set_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">params</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
||||
<span class="n">params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">model</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_fit_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
||||
<span class="n">cls_params</span><span class="p">,</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_params</span><span class="p">)</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="n">model</span><span class="o">.</span><span class="n">val_split</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_fit_aggregation</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">):</span>
|
||||
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">),</span> <span class="n">q_params</span><span class="p">),</span> <span class="n">training</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">q_params</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">model</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_check_binary</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
||||
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">cls_configs</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">models_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_classifier</span><span class="p">,</span>
|
||||
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">cls_configs</span><span class="p">),</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_configs</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="n">training</span><span class="p">,</span> <span class="n">predict_on</span><span class="o">=</span><span class="n">model</span><span class="o">.</span><span class="n">val_split</span><span class="p">)</span>
|
||||
<span class="n">models_preds</span> <span class="o">=</span> <span class="p">[(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">)]</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit_aggregation</span><span class="p">,</span>
|
||||
<span class="p">((</span><span class="n">setup</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">setup</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">models_preds</span><span class="p">,</span> <span class="n">q_configs</span><span class="p">)),</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">configs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">model_selection</span><span class="o">.</span><span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">models</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_fit</span><span class="p">,</span>
|
||||
<span class="p">((</span><span class="n">params</span><span class="p">,</span> <span class="n">training</span><span class="p">)</span> <span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="n">configs</span><span class="p">),</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||||
<span class="n">model</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="k">return</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="MedianEstimator.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.MedianEstimator.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_delayed_predict</span><span class="p">,</span>
|
||||
<span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">models</span><span class="p">),</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span>
|
||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span>
|
||||
<span class="p">)</span>
|
||||
<span class="n">prev_preds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prev_preds</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Ensemble"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble">[docs]</a><span class="k">class</span> <span class="nc">Ensemble</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
||||
<span class="n">VALID_POLICIES</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'ave'</span><span class="p">,</span> <span class="s1">'ptr'</span><span class="p">,</span> <span class="s1">'ds'</span><span class="p">}</span> <span class="o">|</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span>
|
||||
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implementation of the Ensemble methods for quantification described by </span>
|
||||
<span class="sd"> `Pérez-Gállego et al., 2017 <https://www.sciencedirect.com/science/article/pii/S1566253516300628>`_</span>
|
||||
<span class="sd"> and</span>
|
||||
<span class="sd"> `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
||||
<span class="sd"> The policies implemented include:</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> - Average (`policy='ave'`): computes class prevalence estimates as the average of the estimates </span>
|
||||
<span class="sd"> returned by the base quantifiers.</span>
|
||||
<span class="sd"> - Training Prevalence (`policy='ptr'`): applies a dynamic selection to the ensemble’s members by retaining only </span>
|
||||
<span class="sd"> those members such that the class prevalence values in the samples they use as training set are closest to </span>
|
||||
<span class="sd"> preliminary class prevalence estimates computed as the average of the estimates of all the members. The final </span>
|
||||
<span class="sd"> estimate is recomputed by considering only the selected members.</span>
|
||||
<span class="sd"> - Distribution Similarity (`policy='ds'`): performs a dynamic selection of base members by retaining</span>
|
||||
<span class="sd"> the members trained on samples whose distribution of posterior probabilities is closest, in terms of the</span>
|
||||
<span class="sd"> Hellinger Distance, to the distribution of posterior probabilities in the test sample</span>
|
||||
<span class="sd"> - Accuracy (`policy='<valid error name>'`): performs a static selection of the ensemble members by</span>
|
||||
<span class="sd"> retaining those that minimize a quantification error measure, which is passed as an argument.</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> Example:</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> >>> model = Ensemble(quantifier=ACC(LogisticRegression()), size=30, policy='ave', n_jobs=-1)</span>
|
||||
<span class="sd"> </span>
|
||||
<span class="sd"> :param quantifier: base quantification member of the ensemble </span>
|
||||
<span class="sd"> :param size: number of members</span>
|
||||
<span class="sd"> :param red_size: number of members to retain after selection (depending on the policy)</span>
|
||||
<span class="sd"> :param min_pos: minimum number of positive instances to consider a sample as valid </span>
|
||||
<span class="sd"> :param policy: the selection policy; available policies include: `ave` (default), `ptr`, `ds`, and accuracy </span>
|
||||
<span class="sd"> (which is instantiated via a valid error name, e.g., `mae`)</span>
|
||||
<span class="sd"> :param max_sample_size: maximum number of instances to consider in the samples (set to None </span>
|
||||
<span class="sd"> to indicate no limit, default)</span>
|
||||
<span class="sd"> :param val_split: a float in range (0,1) indicating the proportion of data to be used as a stratified held-out</span>
|
||||
<span class="sd"> validation split, or a :class:`quapy.data.base.LabelledCollection` (the split itself).</span>
|
||||
<span class="sd"> :param n_jobs: number of parallel workers (default 1)</span>
|
||||
<span class="sd"> :param verbose: set to True (default is False) to get some information in standard output</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
||||
<span class="n">quantifier</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
||||
<span class="n">size</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span>
|
||||
<span class="n">red_size</span><span class="o">=</span><span class="mi">25</span><span class="p">,</span>
|
||||
<span class="n">min_pos</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
|
||||
<span class="n">policy</span><span class="o">=</span><span class="s1">'ave'</span><span class="p">,</span>
|
||||
<span class="n">max_sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">val_split</span><span class="p">:</span><span class="n">Union</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="n">policy</span> <span class="ow">in</span> <span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'unknown policy=</span><span class="si">{</span><span class="n">policy</span><span class="si">}</span><span class="s1">; valid are </span><span class="si">{</span><span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="k">assert</span> <span class="n">max_sample_size</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_sample_size</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> \
|
||||
<span class="s1">'wrong value for max_sample_size; set it to a positive number or None'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">quantifier</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">=</span> <span class="n">size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">min_pos</span> <span class="o">=</span> <span class="n">min_pos</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">red_size</span> <span class="o">=</span> <span class="n">red_size</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">=</span> <span class="n">policy</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">val_split</span> <span class="o">=</span> <span class="n">val_split</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span> <span class="o">=</span> <span class="n">max_sample_size</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_sout</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'[Ensemble]'</span> <span class="o">+</span> <span class="n">msg</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="Ensemble.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">val_split</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">'ds'</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">data</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'ds policy is only defined for binary quantification, but this dataset is not binary'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">val_split</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">val_split</span>
|
||||
|
||||
<span class="c1"># randomly chooses the prevalences for each member of the ensemble (preventing classes with less than</span>
|
||||
<span class="c1"># min_pos positive examples)</span>
|
||||
<span class="n">sample_size</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="nb">min</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">max_sample_size</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">))</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">_draw_simplex</span><span class="p">(</span><span class="n">ndim</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">min_val</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">min_pos</span> <span class="o">/</span> <span class="n">sample_size</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">)]</span>
|
||||
|
||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">'ds'</span><span class="p">:</span>
|
||||
<span class="c1"># precompute the training posterior probabilities</span>
|
||||
<span class="n">posteriors</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ds_policy_get_posteriors</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
||||
|
||||
<span class="n">is_static_policy</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">)</span>
|
||||
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="p">(</span>
|
||||
<span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">val_split</span><span class="p">,</span> <span class="n">prev</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">is_static_policy</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">,</span> <span class="n">sample_size</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">prevs</span>
|
||||
<span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="n">_delayed_new_instance</span><span class="p">,</span>
|
||||
<span class="n">tqdm</span><span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="n">desc</span><span class="o">=</span><span class="s1">'fitting ensamble'</span><span class="p">,</span> <span class="n">total</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">)</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="k">else</span> <span class="n">args</span><span class="p">,</span>
|
||||
<span class="n">asarray</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># static selection policy (the name of a quantification-oriented error function to minimize)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_accuracy_policy</span><span class="p">(</span><span class="n">error_name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">policy</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="s1">'Fit [Done]'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="Ensemble.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span><span class="n">_delayed_quantify</span><span class="p">,</span> <span class="p">((</span><span class="n">Qi</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span> <span class="k">for</span> <span class="n">Qi</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">),</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">'ptr'</span><span class="p">:</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ptr_policy</span><span class="p">(</span><span class="n">predictions</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">policy</span> <span class="o">==</span> <span class="s1">'ds'</span><span class="p">:</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ds_policy</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">normalize_prevalence</span><span class="p">(</span><span class="n">predictions</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="Ensemble.set_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility</span>
|
||||
<span class="sd"> with the abstract class).</span>
|
||||
<span class="sd"> Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or</span>
|
||||
<span class="sd"> `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for</span>
|
||||
<span class="sd"> classification (not recommended).</span>
|
||||
|
||||
<span class="sd"> :param parameters: dictionary</span>
|
||||
<span class="sd"> :return: raises an Exception</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> should not be used within GridSearchQ; '</span>
|
||||
<span class="sa">f</span><span class="s1">'instead, use Ensemble(GridSearchQ(q),...), with q a Quantifier (recommended), '</span>
|
||||
<span class="sa">f</span><span class="s1">'or Ensemble(Q(GridSearchCV(l))) with Q a quantifier class that has a classifier '</span>
|
||||
<span class="sa">f</span><span class="s1">'l optimized for classification (not recommended).'</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="Ensemble.get_params"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.Ensemble.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> This function should not be used within :class:`quapy.model_selection.GridSearchQ` (is here for compatibility</span>
|
||||
<span class="sd"> with the abstract class).</span>
|
||||
<span class="sd"> Instead, use `Ensemble(GridSearchQ(q),...)`, with `q` a Quantifier (recommended), or</span>
|
||||
<span class="sd"> `Ensemble(Q(GridSearchCV(l)))` with `Q` a quantifier class that has a classifier `l` optimized for</span>
|
||||
<span class="sd"> classification (not recommended).</span>
|
||||
|
||||
<span class="sd"> :param deep: for compatibility with scikit-learn</span>
|
||||
<span class="sd"> :return: raises an Exception</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_accuracy_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">error_name</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Selects the red_size best performant quantifiers in a static way (i.e., dropping all non-selected instances).</span>
|
||||
<span class="sd"> For each model in the ensemble, the performance is measured in terms of _error_name_ on the quantification of</span>
|
||||
<span class="sd"> the samples used for training the rest of the models in the ensemble.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.evaluation</span> <span class="kn">import</span> <span class="n">evaluate_on_samples</span>
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error_name</span><span class="p">)</span>
|
||||
<span class="n">tests</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
|
||||
<span class="n">scores</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">model</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">):</span>
|
||||
<span class="n">scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">evaluate_on_samples</span><span class="p">(</span><span class="n">model</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">tests</span><span class="p">[:</span><span class="n">i</span><span class="p">]</span> <span class="o">+</span> <span class="n">tests</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">:],</span> <span class="n">error</span><span class="p">))</span>
|
||||
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">scores</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span> <span class="o">=</span> <span class="n">_select_k</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_ptr_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictions</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Selects the predictions made by models that have been trained on samples with a prevalence that is most similar</span>
|
||||
<span class="sd"> to a first approximation of the test prevalence as made by all models in the ensemble.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">test_prev_estim</span> <span class="o">=</span> <span class="n">predictions</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">tr_prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
|
||||
<span class="n">ptr_differences</span> <span class="o">=</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mse</span><span class="p">(</span><span class="n">ptr_i</span><span class="p">,</span> <span class="n">test_prev_estim</span><span class="p">)</span> <span class="k">for</span> <span class="n">ptr_i</span> <span class="ow">in</span> <span class="n">tr_prevs</span><span class="p">]</span>
|
||||
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">ptr_differences</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">_select_k</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_ds_policy_get_posteriors</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> In the original article, there are some aspects regarding this method that are not mentioned. The paper says</span>
|
||||
<span class="sd"> that the distribution of posterior probabilities from training and test examples is compared by means of the</span>
|
||||
<span class="sd"> Hellinger Distance. However, how these posterior probabilities are generated is not specified. In the article,</span>
|
||||
<span class="sd"> a Logistic Regressor (LR) is used as the classifier device and that could be used for this purpose. However, in</span>
|
||||
<span class="sd"> general, a Quantifier is not necessarily an instance of Aggreggative Probabilistic Quantifiers, and so, that the</span>
|
||||
<span class="sd"> quantifier builds on top of a probabilistic classifier cannot be given for granted. Additionally, it would not</span>
|
||||
<span class="sd"> be correct to generate the posterior probabilities for training instances that have concurred in training the</span>
|
||||
<span class="sd"> classifier that generates them.</span>
|
||||
|
||||
<span class="sd"> This function thus generates the posterior probabilities for all training documents in a cross-validation way,</span>
|
||||
<span class="sd"> using LR with hyperparameters that have previously been optimized via grid search in 5FCV.</span>
|
||||
|
||||
<span class="sd"> :param data: a LabelledCollection</span>
|
||||
<span class="sd"> :return: (P,f,) where P is an ndarray containing the posterior probabilities of the training data, generated via</span>
|
||||
<span class="sd"> cross-validation and using an optimized LR, and the function to be used in order to generate posterior</span>
|
||||
<span class="sd"> probabilities for test instances.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">Xy</span>
|
||||
<span class="n">lr_base</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">class_weight</span><span class="o">=</span><span class="s1">'balanced'</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
|
||||
|
||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">9</span><span class="p">)}</span>
|
||||
<span class="n">optim</span> <span class="o">=</span> <span class="n">GridSearchCV</span><span class="p">(</span><span class="n">lr_base</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">param_grid</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
|
||||
<span class="n">posteriors</span> <span class="o">=</span> <span class="n">cross_val_predict</span><span class="p">(</span><span class="n">optim</span><span class="o">.</span><span class="n">best_estimator_</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">cv</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">'predict_proba'</span><span class="p">)</span>
|
||||
<span class="n">posteriors_generator</span> <span class="o">=</span> <span class="n">optim</span><span class="o">.</span><span class="n">best_estimator_</span><span class="o">.</span><span class="n">predict_proba</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">posteriors_generator</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_ds_policy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">test</span><span class="p">):</span>
|
||||
<span class="n">test_posteriors</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">post_proba_fn</span><span class="p">(</span><span class="n">test</span><span class="p">)</span>
|
||||
<span class="n">test_distribution</span> <span class="o">=</span> <span class="n">get_probability_distribution</span><span class="p">(</span><span class="n">test_posteriors</span><span class="p">)</span>
|
||||
<span class="n">tr_distributions</span> <span class="o">=</span> <span class="p">[</span><span class="n">m</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">ensemble</span><span class="p">]</span>
|
||||
<span class="n">dist</span> <span class="o">=</span> <span class="p">[</span><span class="n">F</span><span class="o">.</span><span class="n">HellingerDistance</span><span class="p">(</span><span class="n">tr_dist_i</span><span class="p">,</span> <span class="n">test_distribution</span><span class="p">)</span> <span class="k">for</span> <span class="n">tr_dist_i</span> <span class="ow">in</span> <span class="n">tr_distributions</span><span class="p">]</span>
|
||||
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">dist</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">_select_k</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">red_size</span><span class="p">)</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Indicates that the quantifier is not aggregative.</span>
|
||||
|
||||
<span class="sd"> :return: False</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">probabilistic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Indicates that the quantifier is not probabilistic.</span>
|
||||
|
||||
<span class="sd"> :return: False</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="kc">False</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_probability_distribution"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.get_probability_distribution">[docs]</a><span class="k">def</span> <span class="nf">get_probability_distribution</span><span class="p">(</span><span class="n">posterior_probabilities</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="mi">8</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Gets a histogram out of the posterior probabilities (only for the binary case).</span>
|
||||
|
||||
<span class="sd"> :param posterior_probabilities: array-like of shape `(n_instances, 2,)`</span>
|
||||
<span class="sd"> :param bins: integer</span>
|
||||
<span class="sd"> :return: `np.ndarray` with the relative frequencies for each bin (for the positive class only)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">posterior_probabilities</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">'the posterior probabilities do not seem to be for a binary problem'</span>
|
||||
<span class="n">posterior_probabilities</span> <span class="o">=</span> <span class="n">posterior_probabilities</span><span class="p">[:,</span> <span class="mi">1</span><span class="p">]</span> <span class="c1"># take the positive posteriors only</span>
|
||||
<span class="n">distribution</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">posterior_probabilities</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">density</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">distribution</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_select_k</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">order</span><span class="p">,</span> <span class="n">k</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="p">[</span><span class="n">elements</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span> <span class="k">for</span> <span class="n">idx</span> <span class="ow">in</span> <span class="n">order</span><span class="p">[:</span><span class="n">k</span><span class="p">]]</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_new_instance</span><span class="p">(</span><span class="n">args</span><span class="p">):</span>
|
||||
<span class="n">base_quantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">val_split</span><span class="p">,</span> <span class="n">prev</span><span class="p">,</span> <span class="n">posteriors</span><span class="p">,</span> <span class="n">keep_samples</span><span class="p">,</span> <span class="n">verbose</span><span class="p">,</span> <span class="n">sample_size</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">fit-start for prev </span><span class="si">{</span><span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">)</span><span class="si">}</span><span class="s1">, sample_size=</span><span class="si">{</span><span class="n">sample_size</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">base_quantifier</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">val_split</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
|
||||
<span class="k">assert</span> <span class="mi">0</span> <span class="o"><</span> <span class="n">val_split</span> <span class="o"><</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'val_split should be in (0,1)'</span>
|
||||
<span class="n">data</span><span class="p">,</span> <span class="n">val_split</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mi">1</span> <span class="o">-</span> <span class="n">val_split</span><span class="p">)</span>
|
||||
|
||||
<span class="n">sample_index</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prev</span><span class="p">)</span>
|
||||
<span class="n">sample</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">sample_index</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">val_split</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">sample</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="n">val_split</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">sample</span><span class="p">)</span>
|
||||
|
||||
<span class="n">tr_prevalence</span> <span class="o">=</span> <span class="n">sample</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
<span class="n">tr_distribution</span> <span class="o">=</span> <span class="n">get_probability_distribution</span><span class="p">(</span><span class="n">posteriors</span><span class="p">[</span><span class="n">sample_index</span><span class="p">])</span> <span class="k">if</span> <span class="p">(</span><span class="n">posteriors</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="k">else</span> <span class="kc">None</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">\--fit-ended for prev </span><span class="si">{</span><span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">tr_prevalence</span><span class="p">,</span> <span class="n">tr_distribution</span><span class="p">,</span> <span class="n">sample</span> <span class="k">if</span> <span class="n">keep_samples</span> <span class="k">else</span> <span class="kc">None</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_delayed_quantify</span><span class="p">(</span><span class="n">args</span><span class="p">):</span>
|
||||
<span class="n">quantifier</span><span class="p">,</span> <span class="n">instances</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="k">return</span> <span class="n">quantifier</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_draw_simplex</span><span class="p">(</span><span class="n">ndim</span><span class="p">,</span> <span class="n">min_val</span><span class="p">,</span> <span class="n">max_trials</span><span class="o">=</span><span class="mi">100</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns a uniform sampling from the ndim-dimensional simplex but guarantees that all dimensions</span>
|
||||
<span class="sd"> are >= min_class_prev (for min_val>0, this makes the sampling not truly uniform)</span>
|
||||
|
||||
<span class="sd"> :param ndim: number of dimensions of the simplex</span>
|
||||
<span class="sd"> :param min_val: minimum class prevalence allowed. If less than 1/ndim a ValueError will be throw since</span>
|
||||
<span class="sd"> there is no possible solution.</span>
|
||||
<span class="sd"> :return: a sample from the ndim-dimensional simplex that is uniform in S(ndim)-R where S(ndim) is the simplex</span>
|
||||
<span class="sd"> and R is the simplex subset containing dimensions lower than min_val</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">min_val</span> <span class="o">>=</span> <span class="mi">1</span> <span class="o">/</span> <span class="n">ndim</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'no sample can be draw from the </span><span class="si">{</span><span class="n">ndim</span><span class="si">}</span><span class="s1">-dimensional simplex so that '</span>
|
||||
<span class="sa">f</span><span class="s1">'all its values are >=</span><span class="si">{</span><span class="n">min_val</span><span class="si">}</span><span class="s1"> (try with a larger value for min_pos)'</span><span class="p">)</span>
|
||||
<span class="n">trials</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">while</span> <span class="kc">True</span><span class="p">:</span>
|
||||
<span class="n">u</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">uniform_simplex_sampling</span><span class="p">(</span><span class="n">ndim</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">all</span><span class="p">(</span><span class="n">u</span> <span class="o">>=</span> <span class="n">min_val</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">u</span>
|
||||
<span class="n">trials</span> <span class="o">+=</span> <span class="mi">1</span>
|
||||
<span class="k">if</span> <span class="n">trials</span> <span class="o">>=</span> <span class="n">max_trials</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'it looks like finding a random simplex with all its dimensions being'</span>
|
||||
<span class="sa">f</span><span class="s1">'>= </span><span class="si">{</span><span class="n">min_val</span><span class="si">}</span><span class="s1"> is unlikely (it failed after </span><span class="si">{</span><span class="n">max_trials</span><span class="si">}</span><span class="s1"> trials)'</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_instantiate_ensemble</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">optim</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">optim</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">CLASSIFICATION_ERROR</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">optim</span> <span class="o">==</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">f1e</span><span class="p">:</span>
|
||||
<span class="n">scoring</span> <span class="o">=</span> <span class="n">make_scorer</span><span class="p">(</span><span class="n">f1_score</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="n">optim</span> <span class="o">==</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">acce</span><span class="p">:</span>
|
||||
<span class="n">scoring</span> <span class="o">=</span> <span class="n">make_scorer</span><span class="p">(</span><span class="n">accuracy_score</span><span class="p">)</span>
|
||||
<span class="n">classifier</span> <span class="o">=</span> <span class="n">GridSearchCV</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">scoring</span><span class="o">=</span><span class="n">scoring</span><span class="p">)</span>
|
||||
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">base_quantifier</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span><span class="n">base_quantifier_class</span><span class="p">(</span><span class="n">classifier</span><span class="p">),</span>
|
||||
<span class="n">param_grid</span><span class="o">=</span><span class="n">param_grid</span><span class="p">,</span>
|
||||
<span class="o">**</span><span class="n">param_model_sel</span><span class="p">,</span>
|
||||
<span class="n">error</span><span class="o">=</span><span class="n">optim</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">Ensemble</span><span class="p">(</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_check_error</span><span class="p">(</span><span class="n">error</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">error</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="kc">None</span>
|
||||
<span class="k">if</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR</span> <span class="ow">or</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">CLASSIFICATION_ERROR</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">error</span>
|
||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'unexpected error type; must either be a callable function or a str representing</span><span class="se">\n</span><span class="s1">'</span>
|
||||
<span class="sa">f</span><span class="s1">'the name of an error function in </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">ERROR_NAMES</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ensembleFactory"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.ensembleFactory">[docs]</a><span class="k">def</span> <span class="nf">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">:</span> <span class="nb">dict</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
||||
<span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Ensemble factory. Provides a unified interface for instantiating ensembles that can be optimized (via model</span>
|
||||
<span class="sd"> selection for quantification) for a given evaluation metric using :class:`quapy.model_selection.GridSearchQ`.</span>
|
||||
<span class="sd"> If the evaluation metric is classification-oriented</span>
|
||||
<span class="sd"> (instead of quantification-oriented), then the optimization will be carried out via sklearn's</span>
|
||||
<span class="sd"> `GridSearchCV <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html>`_.</span>
|
||||
|
||||
<span class="sd"> Example to instantiate an :class:`Ensemble` based on :class:`quapy.method.aggregative.PACC`</span>
|
||||
<span class="sd"> in which the base members are optimized for :meth:`quapy.error.mae` via</span>
|
||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`. The ensemble follows the policy `Accuracy` based</span>
|
||||
<span class="sd"> on :meth:`quapy.error.mae` (the same measure being optimized),</span>
|
||||
<span class="sd"> meaning that a static selection of members of the ensemble is made based on their performance</span>
|
||||
<span class="sd"> in terms of this error.</span>
|
||||
|
||||
<span class="sd"> >>> param_grid = {</span>
|
||||
<span class="sd"> >>> 'C': np.logspace(-3,3,7),</span>
|
||||
<span class="sd"> >>> 'class_weight': ['balanced', None]</span>
|
||||
<span class="sd"> >>> }</span>
|
||||
<span class="sd"> >>> param_mod_sel = {</span>
|
||||
<span class="sd"> >>> 'sample_size': 500,</span>
|
||||
<span class="sd"> >>> 'protocol': 'app'</span>
|
||||
<span class="sd"> >>> }</span>
|
||||
<span class="sd"> >>> common={</span>
|
||||
<span class="sd"> >>> 'max_sample_size': 1000,</span>
|
||||
<span class="sd"> >>> 'n_jobs': -1,</span>
|
||||
<span class="sd"> >>> 'param_grid': param_grid,</span>
|
||||
<span class="sd"> >>> 'param_mod_sel': param_mod_sel,</span>
|
||||
<span class="sd"> >>> }</span>
|
||||
<span class="sd"> >>></span>
|
||||
<span class="sd"> >>> ensembleFactory(LogisticRegression(), PACC, optim='mae', policy='mae', **common)</span>
|
||||
|
||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param base_quantifier_class: a class of quantifiers</span>
|
||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">optim</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">param_grid</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'param_grid is None but optim was requested.'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">param_model_sel</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'param_model_sel is None but optim was requested.'</span><span class="p">)</span>
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">_check_error</span><span class="p">(</span><span class="n">optim</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">_instantiate_ensemble</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">base_quantifier_class</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">error</span><span class="p">,</span> <span class="n">param_model_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ECC"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.ECC">[docs]</a><span class="k">def</span> <span class="nf">ECC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.CC` quantifiers, as used by</span>
|
||||
<span class="sd"> `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
||||
|
||||
<span class="sd"> Equivalent to:</span>
|
||||
|
||||
<span class="sd"> >>> ensembleFactory(classifier, CC, param_grid, optim, param_mod_sel, **kwargs)</span>
|
||||
|
||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
||||
|
||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">CC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EACC"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EACC">[docs]</a><span class="k">def</span> <span class="nf">EACC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.ACC` quantifiers, as used by</span>
|
||||
<span class="sd"> `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
||||
|
||||
<span class="sd"> Equivalent to:</span>
|
||||
|
||||
<span class="sd"> >>> ensembleFactory(classifier, ACC, param_grid, optim, param_mod_sel, **kwargs)</span>
|
||||
|
||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
||||
|
||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">ACC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EPACC"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EPACC">[docs]</a><span class="k">def</span> <span class="nf">EPACC</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.PACC` quantifiers.</span>
|
||||
|
||||
<span class="sd"> Equivalent to:</span>
|
||||
|
||||
<span class="sd"> >>> ensembleFactory(classifier, PACC, param_grid, optim, param_mod_sel, **kwargs)</span>
|
||||
|
||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
||||
|
||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">PACC</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EHDy"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EHDy">[docs]</a><span class="k">def</span> <span class="nf">EHDy</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.HDy` quantifiers, as used by</span>
|
||||
<span class="sd"> `Pérez-Gállego et al., 2019 <https://www.sciencedirect.com/science/article/pii/S1566253517303652>`_.</span>
|
||||
|
||||
<span class="sd"> Equivalent to:</span>
|
||||
|
||||
<span class="sd"> >>> ensembleFactory(classifier, HDy, param_grid, optim, param_mod_sel, **kwargs)</span>
|
||||
|
||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
||||
|
||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">HDy</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EEMQ"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.meta.EEMQ">[docs]</a><span class="k">def</span> <span class="nf">EEMQ</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">optim</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements an ensemble of :class:`quapy.method.aggregative.EMQ` quantifiers.</span>
|
||||
|
||||
<span class="sd"> Equivalent to:</span>
|
||||
|
||||
<span class="sd"> >>> ensembleFactory(classifier, EMQ, param_grid, optim, param_mod_sel, **kwargs)</span>
|
||||
|
||||
<span class="sd"> See :meth:`ensembleFactory` for further details.</span>
|
||||
|
||||
<span class="sd"> :param classifier: sklearn's Estimator that generates a classifier</span>
|
||||
<span class="sd"> :param param_grid: a dictionary with the grid of parameters to optimize for</span>
|
||||
<span class="sd"> :param optim: a valid quantification or classification error, or a string name of it</span>
|
||||
<span class="sd"> :param param_model_sel: a dictionary containing any keyworded argument to pass to</span>
|
||||
<span class="sd"> :class:`quapy.model_selection.GridSearchQ`</span>
|
||||
<span class="sd"> :param kwargs: kwargs for the class :class:`Ensemble`</span>
|
||||
<span class="sd"> :return: an instance of :class:`Ensemble`</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">ensembleFactory</span><span class="p">(</span><span class="n">classifier</span><span class="p">,</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">optim</span><span class="p">,</span> <span class="n">param_mod_sel</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,266 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.method.non_aggregative — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||||
<script src="../../../_static/jquery.js"></script>
|
||||
<script src="../../../_static/underscore.js"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../../_static/doctools.js"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.method.non_aggregative</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.method.non_aggregative</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">get_divergence</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">BinaryQuantifier</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation">[docs]</a><span class="k">class</span> <span class="nc">MaximumLikelihoodPrevalenceEstimation</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> The `Maximum Likelihood Prevalence Estimation` (MLPE) method is a lazy method that assumes there is no prior</span>
|
||||
<span class="sd"> probability shift between training and test instances (put it other way, that the i.i.d. assumpion holds).</span>
|
||||
<span class="sd"> The estimation of class prevalence values for any test sample is always (i.e., irrespective of the test sample</span>
|
||||
<span class="sd"> itself) the class prevalence seen during training. This method is considered to be a lower-bound quantifier that</span>
|
||||
<span class="sd"> any quantification method should beat.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_classes_</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Computes the training prevalence and stores it.</span>
|
||||
|
||||
<span class="sd"> :param data: the training sample</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">estimated_prevalence</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="MaximumLikelihoodPrevalenceEstimation.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.MaximumLikelihoodPrevalenceEstimation.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Ignores the input instances and returns, as the class prevalence estimantes, the training prevalence.</span>
|
||||
|
||||
<span class="sd"> :param instances: array-like (ignored)</span>
|
||||
<span class="sd"> :return: the class prevalence seen during training</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">estimated_prevalence</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="DMx"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx">[docs]</a><span class="k">class</span> <span class="nc">DMx</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generic Distribution Matching quantifier for binary or multiclass quantification based on the space of covariates.</span>
|
||||
<span class="sd"> This implementation takes the number of bins, the divergence, and the possibility to work on CDF as hyperparameters.</span>
|
||||
|
||||
<span class="sd"> :param nbins: number of bins used to discretize the distributions (default 8)</span>
|
||||
<span class="sd"> :param divergence: a string representing a divergence measure (currently, "HD" and "topsoe" are implemented)</span>
|
||||
<span class="sd"> or a callable function taking two ndarrays of the same dimension as input (default "HD", meaning Hellinger</span>
|
||||
<span class="sd"> Distance)</span>
|
||||
<span class="sd"> :param cdf: whether to use CDF instead of PDF (default False)</span>
|
||||
<span class="sd"> :param n_jobs: number of parallel workers (default None)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">nbins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">divergence</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Callable</span><span class="p">]</span><span class="o">=</span><span class="s1">'HD'</span><span class="p">,</span> <span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">search</span><span class="o">=</span><span class="s1">'optim_minimize'</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">nbins</span> <span class="o">=</span> <span class="n">nbins</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">divergence</span> <span class="o">=</span> <span class="n">divergence</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">cdf</span> <span class="o">=</span> <span class="n">cdf</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">search</span> <span class="o">=</span> <span class="n">search</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">n_jobs</span>
|
||||
|
||||
<div class="viewcode-block" id="DMx.HDx"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.HDx">[docs]</a> <span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">HDx</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> `Hellinger Distance x <https://www.sciencedirect.com/science/article/pii/S0020025512004069>`_ (HDx).</span>
|
||||
<span class="sd"> HDx is a method for training binary quantifiers, that models quantification as the problem of</span>
|
||||
<span class="sd"> minimizing the average divergence (in terms of the Hellinger Distance) across the feature-specific normalized</span>
|
||||
<span class="sd"> histograms of two representations, one for the unlabelled examples, and another generated from the training</span>
|
||||
<span class="sd"> examples as a mixture model of the class-specific representations. The parameters of the mixture thus represent</span>
|
||||
<span class="sd"> the estimates of the class prevalence values.</span>
|
||||
|
||||
<span class="sd"> The method computes all matchings for nbins in [10, 20, ..., 110] and reports the mean of the median.</span>
|
||||
<span class="sd"> The best prevalence is searched via linear search, from 0 to 1 stepping by 0.01.</span>
|
||||
|
||||
<span class="sd"> :param n_jobs: number of parallel workers</span>
|
||||
<span class="sd"> :return: an instance of this class setup to mimick the performance of the HDx as originally proposed by</span>
|
||||
<span class="sd"> González-Castro, Alaiz-Rodríguez, Alegre (2013)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">MedianEstimator</span>
|
||||
|
||||
<span class="n">dmx</span> <span class="o">=</span> <span class="n">DMx</span><span class="p">(</span><span class="n">divergence</span><span class="o">=</span><span class="s1">'HD'</span><span class="p">,</span> <span class="n">cdf</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">search</span><span class="o">=</span><span class="s1">'linear_search'</span><span class="p">)</span>
|
||||
<span class="n">nbins</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'nbins'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">110</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">int</span><span class="p">)}</span>
|
||||
<span class="n">hdx</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">base_quantifier</span><span class="o">=</span><span class="n">dmx</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">nbins</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">hdx</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__get_distributions</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
|
||||
<span class="n">histograms</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">feat_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="p">):</span>
|
||||
<span class="n">feature</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span> <span class="n">feat_idx</span><span class="p">]</span>
|
||||
<span class="n">feat_range</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">feat_ranges</span><span class="p">[</span><span class="n">feat_idx</span><span class="p">]</span>
|
||||
<span class="n">hist</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">feature</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">nbins</span><span class="p">,</span> <span class="nb">range</span><span class="o">=</span><span class="n">feat_range</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">norm_hist</span> <span class="o">=</span> <span class="n">hist</span> <span class="o">/</span> <span class="n">hist</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="n">histograms</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">norm_hist</span><span class="p">)</span>
|
||||
<span class="n">distributions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">vstack</span><span class="p">(</span><span class="n">histograms</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">cdf</span><span class="p">:</span>
|
||||
<span class="n">distributions</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="n">distributions</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">distributions</span>
|
||||
|
||||
<div class="viewcode-block" id="DMx.fit"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generates the validation distributions out of the training data (covariates).</span>
|
||||
<span class="sd"> The validation distributions have shape `(n, nfeats, nbins)`, with `n` the number of classes, `nfeats`</span>
|
||||
<span class="sd"> the number of features, and `nbins` the number of bins.</span>
|
||||
<span class="sd"> In particular, let `V` be the validation distributions; then `di=V[i]` are the distributions obtained from</span>
|
||||
<span class="sd"> training data labelled with class `i`; while `dij = di[j]` is the discrete distribution for feature j in</span>
|
||||
<span class="sd"> training data labelled with class `i`, and `dij[k]` is the fraction of instances with a value in the `k`-th bin.</span>
|
||||
|
||||
<span class="sd"> :param data: the training set</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">X</span><span class="p">,</span> <span class="n">y</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">Xy</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">feat_ranges</span> <span class="o">=</span> <span class="n">_get_features_range</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span>
|
||||
<span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">__get_distributions</span><span class="p">(</span><span class="n">X</span><span class="p">[</span><span class="n">y</span><span class="o">==</span><span class="n">cat</span><span class="p">])</span> <span class="k">for</span> <span class="n">cat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)]</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="DMx.quantify"><a class="viewcode-back" href="../../../quapy.method.html#quapy.method.non_aggregative.DMx.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Searches for the mixture model parameter (the sought prevalence values) that yields a validation distribution</span>
|
||||
<span class="sd"> (the mixture) that best matches the test distribution, in terms of the divergence measure of choice.</span>
|
||||
<span class="sd"> The matching is computed as the average dissimilarity (in terms of the dissimilarity measure of choice)</span>
|
||||
<span class="sd"> between all feature-specific discrete distributions.</span>
|
||||
|
||||
<span class="sd"> :param instances: instances in the sample</span>
|
||||
<span class="sd"> :return: a vector of class prevalence estimates</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">assert</span> <span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="p">,</span> <span class="sa">f</span><span class="s1">'wrong shape; expected </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">nfeats</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="n">instances</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="si">}</span><span class="s1">'</span>
|
||||
|
||||
<span class="n">test_distribution</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__get_distributions</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">divergence</span> <span class="o">=</span> <span class="n">get_divergence</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">divergence</span><span class="p">)</span>
|
||||
<span class="n">n_classes</span><span class="p">,</span> <span class="n">n_feats</span><span class="p">,</span> <span class="n">nbins</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">shape</span>
|
||||
<span class="k">def</span> <span class="nf">loss</span><span class="p">(</span><span class="n">prev</span><span class="p">):</span>
|
||||
<span class="n">prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">expand_dims</span><span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">mixture_distribution</span> <span class="o">=</span> <span class="p">(</span><span class="n">prev</span> <span class="o">@</span> <span class="bp">self</span><span class="o">.</span><span class="n">validation_distribution</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_classes</span><span class="p">,</span><span class="o">-</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="n">n_feats</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">divs</span> <span class="o">=</span> <span class="p">[</span><span class="n">divergence</span><span class="p">(</span><span class="n">test_distribution</span><span class="p">[</span><span class="n">feat</span><span class="p">],</span> <span class="n">mixture_distribution</span><span class="p">[</span><span class="n">feat</span><span class="p">])</span> <span class="k">for</span> <span class="n">feat</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_feats</span><span class="p">)]</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">divs</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">argmin_prevalence</span><span class="p">(</span><span class="n">loss</span><span class="p">,</span> <span class="n">n_classes</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">search</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_features_range</span><span class="p">(</span><span class="n">X</span><span class="p">):</span>
|
||||
<span class="n">feat_ranges</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">ncols</span> <span class="o">=</span> <span class="n">X</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">for</span> <span class="n">col_idx</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">ncols</span><span class="p">):</span>
|
||||
<span class="n">feature</span> <span class="o">=</span> <span class="n">X</span><span class="p">[:,</span><span class="n">col_idx</span><span class="p">]</span>
|
||||
<span class="n">feat_ranges</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">np</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">feature</span><span class="p">),</span> <span class="n">np</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">feature</span><span class="p">)))</span>
|
||||
<span class="k">return</span> <span class="n">feat_ranges</span>
|
||||
|
||||
|
||||
<span class="c1">#---------------------------------------------------------------</span>
|
||||
<span class="c1"># aliases</span>
|
||||
<span class="c1">#---------------------------------------------------------------</span>
|
||||
|
||||
<span class="n">DistributionMatchingX</span> <span class="o">=</span> <span class="n">DMx</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,516 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.model_selection — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||||
<script src="../../_static/jquery.js"></script>
|
||||
<script src="../../_static/underscore.js"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../_static/doctools.js"></script>
|
||||
<script src="../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.model_selection</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.model_selection</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">itertools</span>
|
||||
<span class="kn">import</span> <span class="nn">signal</span>
|
||||
<span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
||||
<span class="kn">from</span> <span class="nn">enum</span> <span class="kn">import</span> <span class="n">Enum</span>
|
||||
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Union</span><span class="p">,</span> <span class="n">Callable</span>
|
||||
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">wraps</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn</span> <span class="kn">import</span> <span class="n">clone</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy</span> <span class="kn">import</span> <span class="n">evaluation</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">AbstractProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data.base</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.util</span> <span class="kn">import</span> <span class="n">timeout</span>
|
||||
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="Status"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.Status">[docs]</a><span class="k">class</span> <span class="nc">Status</span><span class="p">(</span><span class="n">Enum</span><span class="p">):</span>
|
||||
<span class="n">SUCCESS</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="n">TIMEOUT</span> <span class="o">=</span> <span class="mi">2</span>
|
||||
<span class="n">INVALID</span> <span class="o">=</span> <span class="mi">3</span>
|
||||
<span class="n">ERROR</span> <span class="o">=</span> <span class="mi">4</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ConfigStatus"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus">[docs]</a><span class="k">class</span> <span class="nc">ConfigStatus</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">msg</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">params</span> <span class="o">=</span> <span class="n">params</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">=</span> <span class="n">status</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">msg</span> <span class="o">=</span> <span class="n">msg</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="sa">f</span><span class="s1">':params:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">params</span><span class="si">}</span><span class="s1"> :status:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">status</span><span class="si">}</span><span class="s1"> '</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">msg</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="ConfigStatus.success"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus.success">[docs]</a> <span class="k">def</span> <span class="nf">success</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">==</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span></div>
|
||||
|
||||
<div class="viewcode-block" id="ConfigStatus.failed"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.ConfigStatus.failed">[docs]</a> <span class="k">def</span> <span class="nf">failed</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">status</span> <span class="o">!=</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="GridSearchQ"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ">[docs]</a><span class="k">class</span> <span class="nc">GridSearchQ</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Grid Search optimization targeting a quantification-oriented metric.</span>
|
||||
|
||||
<span class="sd"> Optimizes the hyperparameters of a quantification method, based on an evaluation method and on an evaluation</span>
|
||||
<span class="sd"> protocol for quantification.</span>
|
||||
|
||||
<span class="sd"> :param model: the quantifier to optimize</span>
|
||||
<span class="sd"> :type model: BaseQuantifier</span>
|
||||
<span class="sd"> :param param_grid: a dictionary with keys the parameter names and values the list of values to explore</span>
|
||||
<span class="sd"> :param protocol: a sample generation protocol, an instance of :class:`quapy.protocol.AbstractProtocol`</span>
|
||||
<span class="sd"> :param error: an error function (callable) or a string indicating the name of an error function (valid ones</span>
|
||||
<span class="sd"> are those in :class:`quapy.error.QUANTIFICATION_ERROR`</span>
|
||||
<span class="sd"> :param refit: whether to refit the model on the whole labelled collection (training+validation) with</span>
|
||||
<span class="sd"> the best chosen hyperparameter combination. Ignored if protocol='gen'</span>
|
||||
<span class="sd"> :param timeout: establishes a timer (in seconds) for each of the hyperparameters configurations being tested.</span>
|
||||
<span class="sd"> Whenever a run takes longer than this timer, that configuration will be ignored. If all configurations end up</span>
|
||||
<span class="sd"> being ignored, a TimeoutError exception is raised. If -1 (default) then no time bound is set.</span>
|
||||
<span class="sd"> :param raise_errors: boolean, if True then raises an exception when a param combination yields any error, if</span>
|
||||
<span class="sd"> otherwise is False (default), then the combination is marked with an error status, but the process goes on.</span>
|
||||
<span class="sd"> However, if no configuration yields a valid model, then a ValueError exception will be raised.</span>
|
||||
<span class="sd"> :param verbose: set to True to get information through the stdout</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
|
||||
<span class="n">model</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span>
|
||||
<span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">,</span>
|
||||
<span class="n">protocol</span><span class="p">:</span> <span class="n">AbstractProtocol</span><span class="p">,</span>
|
||||
<span class="n">error</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Callable</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">,</span>
|
||||
<span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
||||
<span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">raise_errors</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
||||
<span class="n">verbose</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">model</span> <span class="o">=</span> <span class="n">model</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">param_grid</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">protocol</span> <span class="o">=</span> <span class="n">protocol</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">refit</span> <span class="o">=</span> <span class="n">refit</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">timeout</span> <span class="o">=</span> <span class="n">timeout</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_njobs</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">raise_errors</span> <span class="o">=</span> <span class="n">raise_errors</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">verbose</span> <span class="o">=</span> <span class="n">verbose</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">__check_error</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">protocol</span><span class="p">,</span> <span class="n">AbstractProtocol</span><span class="p">),</span> <span class="s1">'unknown protocol'</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_sout</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">msg</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">verbose</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">:</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">]: </span><span class="si">{</span><span class="n">msg</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">__check_error</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">error</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">error</span> <span class="ow">in</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">error</span>
|
||||
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">error</span><span class="p">)</span>
|
||||
<span class="k">elif</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">error</span><span class="p">,</span> <span class="s1">'__call__'</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">error</span> <span class="o">=</span> <span class="n">error</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s1">'unexpected error type; must either be a callable function or a str representing</span><span class="se">\n</span><span class="s1">'</span>
|
||||
<span class="sa">f</span><span class="s1">'the name of an error function in </span><span class="si">{</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_prepare_classifier</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">):</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">cls_params</span><span class="p">):</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">cls_params</span><span class="p">)</span>
|
||||
<span class="n">predictions</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">classifier_fit_predict</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">predictions</span>
|
||||
|
||||
<span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[classifier fit] hyperparams=</span><span class="si">{</span><span class="n">cls_params</span><span class="si">}</span><span class="s1"> [took </span><span class="si">{</span><span class="n">took</span><span class="si">:</span><span class="s1">.3f</span><span class="si">}</span><span class="s1">s]'</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_prepare_aggregation</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">args</span><span class="p">):</span>
|
||||
<span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">cls_took</span><span class="p">,</span> <span class="n">cls_params</span><span class="p">,</span> <span class="n">q_params</span> <span class="o">=</span> <span class="n">args</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
|
||||
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="o">**</span><span class="n">cls_params</span><span class="p">,</span> <span class="o">**</span><span class="n">q_params</span><span class="p">}</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">q_params</span><span class="p">):</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">q_params</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">aggregation_fit</span><span class="p">(</span><span class="n">predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
|
||||
<span class="n">score</span> <span class="o">=</span> <span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">score</span>
|
||||
|
||||
<span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">aggr_took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">q_params</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_print_status</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">aggr_took</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="p">(</span><span class="n">cls_took</span><span class="o">+</span><span class="n">aggr_took</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_prepare_nonaggr_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">job</span><span class="p">(</span><span class="n">params</span><span class="p">):</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">set_params</span><span class="p">(</span><span class="o">**</span><span class="n">params</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_training</span><span class="p">)</span>
|
||||
<span class="n">score</span> <span class="o">=</span> <span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">score</span>
|
||||
|
||||
<span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_error_handler</span><span class="p">(</span><span class="n">job</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_print_status</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_break_down_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Decides whether to break down the fit phase in two (classifier-fit followed by aggregation-fit).</span>
|
||||
<span class="sd"> In order to do so, some conditions should be met: a) the quantifier is of type aggregative,</span>
|
||||
<span class="sd"> b) the set of hyperparameters can be split into two disjoint non-empty groups.</span>
|
||||
|
||||
<span class="sd"> :return: True if the conditions are met, False otherwise</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">model</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">cls_configs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">q_configs</span><span class="p">)</span><span class="o">==</span><span class="mi">1</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="kc">False</span>
|
||||
<span class="k">return</span> <span class="kc">True</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_compute_scores_aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">):</span>
|
||||
<span class="c1"># break down the set of hyperparameters into two: classifier-specific, quantifier-specific</span>
|
||||
<span class="n">cls_configs</span><span class="p">,</span> <span class="n">q_configs</span> <span class="o">=</span> <span class="n">group_params</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># train all classifiers and get the predictions</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_training</span> <span class="o">=</span> <span class="n">training</span>
|
||||
<span class="n">cls_outs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_classifier</span><span class="p">,</span>
|
||||
<span class="n">cls_configs</span><span class="p">,</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="c1"># filter out classifier configurations that yielded any error</span>
|
||||
<span class="n">success_outs</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">),</span> <span class="n">cls_config</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">cls_outs</span><span class="p">,</span> <span class="n">cls_configs</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
|
||||
<span class="n">success_outs</span><span class="o">.</span><span class="n">append</span><span class="p">((</span><span class="n">model</span><span class="p">,</span> <span class="n">predictions</span><span class="p">,</span> <span class="n">took</span><span class="p">,</span> <span class="n">cls_config</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">status</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">success_outs</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'No valid configuration found for the classifier!'</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># explore the quantifier-specific hyperparameters for each valid training configuration</span>
|
||||
<span class="n">aggr_configs</span> <span class="o">=</span> <span class="p">[(</span><span class="o">*</span><span class="n">out</span><span class="p">,</span> <span class="n">q_config</span><span class="p">)</span> <span class="k">for</span> <span class="n">out</span><span class="p">,</span> <span class="n">q_config</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">success_outs</span><span class="p">,</span> <span class="n">q_configs</span><span class="p">)]</span>
|
||||
<span class="n">aggr_outs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_aggregation</span><span class="p">,</span>
|
||||
<span class="n">aggr_configs</span><span class="p">,</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">aggr_outs</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_compute_scores_nonaggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">):</span>
|
||||
<span class="n">configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_training</span> <span class="o">=</span> <span class="n">training</span>
|
||||
<span class="n">scores</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">parallel</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_prepare_nonaggr_model</span><span class="p">,</span>
|
||||
<span class="n">configs</span><span class="p">,</span>
|
||||
<span class="n">seed</span><span class="o">=</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'_R_SEED'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span>
|
||||
<span class="n">n_jobs</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">scores</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_print_status</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'hyperparams=[</span><span class="si">{</span><span class="n">params</span><span class="si">}</span><span class="s1">]</span><span class="se">\t</span><span class="s1"> got </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> = </span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1"> [took </span><span class="si">{</span><span class="n">took</span><span class="si">:</span><span class="s1">.3f</span><span class="si">}</span><span class="s1">s]'</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'error=</span><span class="si">{</span><span class="n">status</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="GridSearchQ.fit"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">training</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">""" Learning routine. Fits methods with all combinations of hyperparameters and selects the one minimizing</span>
|
||||
<span class="sd"> the error metric.</span>
|
||||
|
||||
<span class="sd"> :param training: the training set on which to optimize the hyperparameters</span>
|
||||
<span class="sd"> :return: self</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">refit</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span>
|
||||
<span class="sa">f</span><span class="s1">'"refit" was requested, but the protocol does not implement '</span>
|
||||
<span class="sa">f</span><span class="s1">'the </span><span class="si">{</span><span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1"> interface'</span>
|
||||
<span class="p">)</span>
|
||||
|
||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'starting model selection with n_jobs=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">n_jobs</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_break_down_fit</span><span class="p">():</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_scores_aggregative</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_scores_nonaggregative</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="k">for</span> <span class="n">model</span><span class="p">,</span> <span class="n">params</span><span class="p">,</span> <span class="n">score</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span> <span class="ow">in</span> <span class="n">results</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">status</span><span class="o">.</span><span class="n">success</span><span class="p">():</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">score</span> <span class="o"><</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="o">=</span> <span class="n">score</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_params_</span> <span class="o">=</span> <span class="n">params</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span> <span class="o">=</span> <span class="n">model</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">params</span><span class="p">)]</span> <span class="o">=</span> <span class="n">score</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_scores_</span><span class="p">[</span><span class="nb">str</span><span class="p">(</span><span class="n">params</span><span class="p">)]</span> <span class="o">=</span> <span class="n">status</span><span class="o">.</span><span class="n">status</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">status</span><span class="p">)</span>
|
||||
|
||||
<span class="n">tend</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'no combination of hyperparameters seemed to work'</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'optimization finished: best params </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">best_params_</span><span class="si">}</span><span class="s1"> (score=</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">best_score_</span><span class="si">:</span><span class="s1">.5f</span><span class="si">}</span><span class="s1">) '</span>
|
||||
<span class="sa">f</span><span class="s1">'[took </span><span class="si">{</span><span class="n">tend</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s]'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">no_errors</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">no_errors</span><span class="o">></span><span class="mi">0</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'warning: </span><span class="si">{</span><span class="n">no_errors</span><span class="si">}</span><span class="s1"> errors found'</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">err</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">error_collector</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="si">{</span><span class="nb">str</span><span class="p">(</span><span class="n">err</span><span class="p">)</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">refit</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_sout</span><span class="p">(</span><span class="sa">f</span><span class="s1">'refitting on the whole development set'</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">get_labelled_collection</span><span class="p">())</span>
|
||||
<span class="n">tend</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">refit_time_</span> <span class="o">=</span> <span class="n">tend</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># already checked</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeWarning</span><span class="p">(</span><span class="sa">f</span><span class="s1">'the model cannot be refit on the whole dataset'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="bp">self</span></div>
|
||||
|
||||
<div class="viewcode-block" id="GridSearchQ.quantify"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.quantify">[docs]</a> <span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Estimate class prevalence values using the best model found after calling the :meth:`fit` method.</span>
|
||||
|
||||
<span class="sd"> :param instances: sample contanining the instances</span>
|
||||
<span class="sd"> :return: a ndarray of shape `(n_classes)` with class prevalence estimates as according to the best model found</span>
|
||||
<span class="sd"> by the model selection process.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'best_model_'</span><span class="p">),</span> <span class="s1">'quantify called before fit'</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="GridSearchQ.set_params"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.set_params">[docs]</a> <span class="k">def</span> <span class="nf">set_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">parameters</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Sets the hyper-parameters to explore.</span>
|
||||
|
||||
<span class="sd"> :param parameters: a dictionary with keys the parameter names and values the list of values to explore</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span> <span class="o">=</span> <span class="n">parameters</span></div>
|
||||
|
||||
<div class="viewcode-block" id="GridSearchQ.get_params"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.get_params">[docs]</a> <span class="k">def</span> <span class="nf">get_params</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""Returns the dictionary of hyper-parameters to explore (`param_grid`)</span>
|
||||
|
||||
<span class="sd"> :param deep: Unused</span>
|
||||
<span class="sd"> :return: the dictionary `param_grid`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">param_grid</span></div>
|
||||
|
||||
<div class="viewcode-block" id="GridSearchQ.best_model"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.GridSearchQ.best_model">[docs]</a> <span class="k">def</span> <span class="nf">best_model</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the best model found after calling the :meth:`fit` method, i.e., the one trained on the combination</span>
|
||||
<span class="sd"> of hyper-parameters that minimized the error function.</span>
|
||||
|
||||
<span class="sd"> :return: a trained quantifier</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="s1">'best_model_'</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_model_</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'best_model called before fit'</span><span class="p">)</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_error_handler</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Endorses one job with two returned values: the status, and the time of execution</span>
|
||||
|
||||
<span class="sd"> :param func: the function to be called</span>
|
||||
<span class="sd"> :param params: parameters of the function</span>
|
||||
<span class="sd"> :return: `tuple(out, status, time)` where `out` is the function output,</span>
|
||||
<span class="sd"> `status` is an enum value from `Status`, and `time` is the time it</span>
|
||||
<span class="sd"> took to complete the call</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">output</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">_handle</span><span class="p">(</span><span class="n">status</span><span class="p">,</span> <span class="n">exception</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">raise_errors</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="n">exception</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">ConfigStatus</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">status</span><span class="p">)</span>
|
||||
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="k">with</span> <span class="n">timeout</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">timeout</span><span class="p">):</span>
|
||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
||||
<span class="n">output</span> <span class="o">=</span> <span class="n">func</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
||||
<span class="n">status</span> <span class="o">=</span> <span class="n">ConfigStatus</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">Status</span><span class="o">.</span><span class="n">SUCCESS</span><span class="p">)</span>
|
||||
|
||||
<span class="k">except</span> <span class="ne">TimeoutError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">TIMEOUT</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
|
||||
|
||||
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">INVALID</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
|
||||
|
||||
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
||||
<span class="n">status</span> <span class="o">=</span> <span class="n">_handle</span><span class="p">(</span><span class="n">Status</span><span class="o">.</span><span class="n">ERROR</span><span class="p">,</span> <span class="n">e</span><span class="p">)</span>
|
||||
|
||||
<span class="n">took</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
|
||||
<span class="k">return</span> <span class="n">output</span><span class="p">,</span> <span class="n">status</span><span class="p">,</span> <span class="n">took</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="cross_val_predict"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.cross_val_predict">[docs]</a><span class="k">def</span> <span class="nf">cross_val_predict</span><span class="p">(</span><span class="n">quantifier</span><span class="p">:</span> <span class="n">BaseQuantifier</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">nfolds</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Akin to `scikit-learn's cross_val_predict <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_predict.html>`_</span>
|
||||
<span class="sd"> but for quantification.</span>
|
||||
|
||||
<span class="sd"> :param quantifier: a quantifier issuing class prevalence values</span>
|
||||
<span class="sd"> :param data: a labelled collection</span>
|
||||
<span class="sd"> :param nfolds: number of folds for k-fold cross validation generation</span>
|
||||
<span class="sd"> :param random_state: random seed for reproducibility</span>
|
||||
<span class="sd"> :return: a vector of class prevalence values</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">total_prev</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="ow">in</span> <span class="n">data</span><span class="o">.</span><span class="n">kFCV</span><span class="p">(</span><span class="n">nfolds</span><span class="o">=</span><span class="n">nfolds</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">):</span>
|
||||
<span class="n">quantifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
||||
<span class="n">fold_prev</span> <span class="o">=</span> <span class="n">quantifier</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="n">rel_size</span> <span class="o">=</span> <span class="mf">1.</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">test</span><span class="p">)</span> <span class="o">/</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
||||
<span class="n">total_prev</span> <span class="o">+=</span> <span class="n">fold_prev</span><span class="o">*</span><span class="n">rel_size</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">total_prev</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="expand_grid"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.expand_grid">[docs]</a><span class="k">def</span> <span class="nf">expand_grid</span><span class="p">(</span><span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Expands a param_grid dictionary as a list of configurations.</span>
|
||||
<span class="sd"> Example:</span>
|
||||
|
||||
<span class="sd"> >>> combinations = expand_grid({'A': [1, 10, 100], 'B': [True, False]})</span>
|
||||
<span class="sd"> >>> print(combinations)</span>
|
||||
<span class="sd"> >>> [{'A': 1, 'B': True}, {'A': 1, 'B': False}, {'A': 10, 'B': True}, {'A': 10, 'B': False}, {'A': 100, 'B': True}, {'A': 100, 'B': False}]</span>
|
||||
|
||||
<span class="sd"> :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range</span>
|
||||
<span class="sd"> to explore for that hyper-parameter</span>
|
||||
<span class="sd"> :return: a list of configurations, i.e., combinations of hyper-parameter assignments in the grid.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">params_keys</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">param_grid</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
|
||||
<span class="n">params_values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">param_grid</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
|
||||
<span class="n">configs</span> <span class="o">=</span> <span class="p">[{</span><span class="n">k</span><span class="p">:</span> <span class="n">combs</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">k</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">params_keys</span><span class="p">)}</span> <span class="k">for</span> <span class="n">combs</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="o">*</span><span class="n">params_values</span><span class="p">)]</span>
|
||||
<span class="k">return</span> <span class="n">configs</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="group_params"><a class="viewcode-back" href="../../quapy.html#quapy.model_selection.group_params">[docs]</a><span class="k">def</span> <span class="nf">group_params</span><span class="p">(</span><span class="n">param_grid</span><span class="p">:</span> <span class="nb">dict</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Partitions a param_grid dictionary as two lists of configurations, one for the classifier-specific</span>
|
||||
<span class="sd"> hyper-parameters, and another for que quantifier-specific hyper-parameters</span>
|
||||
|
||||
<span class="sd"> :param param_grid: dictionary with keys representing hyper-parameter names, and values representing the range</span>
|
||||
<span class="sd"> to explore for that hyper-parameter</span>
|
||||
<span class="sd"> :return: two expanded grids of configurations, one for the classifier, another for the quantifier</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">classifier_params</span><span class="p">,</span> <span class="n">quantifier_params</span> <span class="o">=</span> <span class="p">{},</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">values</span> <span class="ow">in</span> <span class="n">param_grid</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
|
||||
<span class="k">if</span> <span class="n">key</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">'classifier__'</span><span class="p">)</span> <span class="ow">or</span> <span class="n">key</span> <span class="o">==</span> <span class="s1">'val_split'</span><span class="p">:</span>
|
||||
<span class="n">classifier_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">values</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">quantifier_params</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">values</span>
|
||||
|
||||
<span class="n">classifier_configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="n">classifier_params</span><span class="p">)</span>
|
||||
<span class="n">quantifier_configs</span> <span class="o">=</span> <span class="n">expand_grid</span><span class="p">(</span><span class="n">quantifier_params</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">classifier_configs</span><span class="p">,</span> <span class="n">quantifier_configs</span></div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,687 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.plot — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.plot</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.plot</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">defaultdict</span>
|
||||
<span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
|
||||
<span class="kn">from</span> <span class="nn">matplotlib.cm</span> <span class="kn">import</span> <span class="n">get_cmap</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">matplotlib</span> <span class="kn">import</span> <span class="n">cm</span>
|
||||
<span class="kn">from</span> <span class="nn">scipy.stats</span> <span class="kn">import</span> <span class="n">ttest_ind_from_stats</span>
|
||||
<span class="kn">from</span> <span class="nn">matplotlib.ticker</span> <span class="kn">import</span> <span class="n">ScalarFormatter</span>
|
||||
<span class="kn">import</span> <span class="nn">math</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">'figure.figsize'</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">6</span><span class="p">]</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">'figure.dpi'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">200</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s1">'font.size'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">18</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="binary_diagonal">
|
||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_diagonal">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">binary_diagonal</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">show_std</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
||||
<span class="n">train_prev</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> The diagonal plot displays the predicted prevalence values (along the y-axis) as a function of the true prevalence</span>
|
||||
<span class="sd"> values (along the x-axis). The optimal quantifier is described by the diagonal (0,0)-(1,1) of the plot (hence the</span>
|
||||
<span class="sd"> name). It is convenient for binary quantification problems, though it can be used for multiclass problems by</span>
|
||||
<span class="sd"> indicating which class is to be taken as the positive class. (For multiclass quantification problems, other plots</span>
|
||||
<span class="sd"> like the :meth:`error_by_drift` might be preferable though).</span>
|
||||
|
||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
||||
<span class="sd"> each experiment</span>
|
||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
||||
<span class="sd"> for each experiment</span>
|
||||
<span class="sd"> :param pos_class: index of the positive class</span>
|
||||
<span class="sd"> :param title: the title to be displayed in the plot</span>
|
||||
<span class="sd"> :param show_std: whether or not to show standard deviations (represented by color bands). This might be inconvenient</span>
|
||||
<span class="sd"> for cases in which many methods are compared, or when the standard deviations are high -- default True)</span>
|
||||
<span class="sd"> :param legend: whether or not to display the leyend (default True)</span>
|
||||
<span class="sd"> :param train_prev: if indicated (default is None), the training prevalence (for the positive class) is hightlighted</span>
|
||||
<span class="sd"> in the plot. This is convenient when all the experiments have been conducted in the same dataset.</span>
|
||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
||||
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
|
||||
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_aspect</span><span class="p">(</span><span class="s1">'equal'</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">'--k'</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'ideal'</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
|
||||
|
||||
<span class="n">order</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">table</span> <span class="o">=</span> <span class="p">{</span><span class="n">method_name</span><span class="p">:[</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span><span class="p">]</span> <span class="k">for</span> <span class="n">method_name</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="n">order</span><span class="p">}</span>
|
||||
<span class="n">order</span> <span class="o">=</span> <span class="p">[(</span><span class="n">method_name</span><span class="p">,</span> <span class="o">*</span><span class="n">table</span><span class="p">[</span><span class="n">method_name</span><span class="p">])</span> <span class="k">for</span> <span class="n">method_name</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
|
||||
|
||||
<span class="n">NUM_COLORS</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">NUM_COLORS</span><span class="o">></span><span class="mi">10</span><span class="p">:</span>
|
||||
<span class="n">cm</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">get_cmap</span><span class="p">(</span><span class="s1">'tab20'</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_prop_cycle</span><span class="p">(</span><span class="n">color</span><span class="o">=</span><span class="p">[</span><span class="n">cm</span><span class="p">(</span><span class="mf">1.</span> <span class="o">*</span> <span class="n">i</span> <span class="o">/</span> <span class="n">NUM_COLORS</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUM_COLORS</span><span class="p">)])</span>
|
||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="n">order</span><span class="p">:</span>
|
||||
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
||||
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
||||
|
||||
<span class="n">x_ticks</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">true_prev</span><span class="p">)</span>
|
||||
<span class="n">x_ticks</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
|
||||
<span class="n">y_ave</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">true_prev</span> <span class="o">==</span> <span class="n">x</span><span class="p">]</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">x_ticks</span><span class="p">])</span>
|
||||
<span class="n">y_std</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">true_prev</span> <span class="o">==</span> <span class="n">x</span><span class="p">]</span><span class="o">.</span><span class="n">std</span><span class="p">()</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">x_ticks</span><span class="p">])</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">x_ticks</span><span class="p">,</span> <span class="n">y_ave</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">'-'</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">'o'</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">show_std</span><span class="p">:</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">x_ticks</span><span class="p">,</span> <span class="n">y_ave</span> <span class="o">-</span> <span class="n">y_std</span><span class="p">,</span> <span class="n">y_ave</span> <span class="o">+</span> <span class="n">y_std</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.25</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">train_prev</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">train_prev</span> <span class="o">=</span> <span class="n">train_prev</span><span class="p">[</span><span class="n">pos_class</span><span class="p">]</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">scatter</span><span class="p">(</span><span class="n">train_prev</span><span class="p">,</span> <span class="n">train_prev</span><span class="p">,</span> <span class="n">c</span><span class="o">=</span><span class="s1">'c'</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'tr-prev'</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="s1">'k'</span><span class="p">,</span> <span class="n">s</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="s1">'true prevalence'</span><span class="p">,</span> <span class="n">ylabel</span><span class="o">=</span><span class="s1">'estimated prevalence'</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">legend</span><span class="p">:</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s1">'center left'</span><span class="p">,</span> <span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">))</span>
|
||||
<span class="c1"># box = ax.get_position()</span>
|
||||
<span class="c1"># ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])</span>
|
||||
<span class="c1"># ax.legend(loc='lower center',</span>
|
||||
<span class="c1"># bbox_to_anchor=(1, -0.5),</span>
|
||||
<span class="c1"># ncol=(len(method_names)+1)//2)</span>
|
||||
|
||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="binary_bias_global">
|
||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_bias_global">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">binary_bias_global</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Box-plots displaying the global bias (i.e., signed error computed as the estimated value minus the true value)</span>
|
||||
<span class="sd"> for each quantification method with respect to a given positive class.</span>
|
||||
|
||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
||||
<span class="sd"> each experiment</span>
|
||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
||||
<span class="sd"> for each experiment</span>
|
||||
<span class="sd"> :param pos_class: index of the positive class</span>
|
||||
<span class="sd"> :param title: the title to be displayed in the plot</span>
|
||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
|
||||
|
||||
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
|
||||
|
||||
<span class="n">data</span><span class="p">,</span> <span class="n">labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
||||
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
||||
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
||||
<span class="n">data</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prev</span><span class="o">-</span><span class="n">true_prev</span><span class="p">)</span>
|
||||
<span class="n">labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">boxplot</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">labels</span><span class="p">,</span> <span class="n">patch_artist</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">showmeans</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">xticks</span><span class="p">(</span><span class="n">rotation</span><span class="o">=</span><span class="mi">45</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">ylabel</span><span class="o">=</span><span class="s1">'error bias'</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
||||
|
||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="binary_bias_bins">
|
||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.binary_bias_bins">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">binary_bias_bins</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">pos_class</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">nbins</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">colormap</span><span class="o">=</span><span class="n">cm</span><span class="o">.</span><span class="n">tab10</span><span class="p">,</span>
|
||||
<span class="n">vertical_xticks</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Box-plots displaying the local bias (i.e., signed error computed as the estimated value minus the true value)</span>
|
||||
<span class="sd"> for different bins of (true) prevalence of the positive classs, for each quantification method.</span>
|
||||
|
||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
||||
<span class="sd"> each experiment</span>
|
||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
||||
<span class="sd"> for each experiment</span>
|
||||
<span class="sd"> :param pos_class: index of the positive class</span>
|
||||
<span class="sd"> :param title: the title to be displayed in the plot</span>
|
||||
<span class="sd"> :param nbins: number of bins</span>
|
||||
<span class="sd"> :param colormap: the matplotlib colormap to use (default cm.tab10)</span>
|
||||
<span class="sd"> :param vertical_xticks: whether or not to add secondary grid (default is False)</span>
|
||||
<span class="sd"> :param legend: whether or not to display the legend (default is True)</span>
|
||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="kn">from</span> <span class="nn">pylab</span> <span class="kn">import</span> <span class="n">boxplot</span><span class="p">,</span> <span class="n">plot</span><span class="p">,</span> <span class="n">setp</span>
|
||||
|
||||
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
|
||||
|
||||
<span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">)</span>
|
||||
|
||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">nbins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">binwidth</span> <span class="o">=</span> <span class="mi">1</span><span class="o">/</span><span class="n">nbins</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="p">{}</span>
|
||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
||||
<span class="n">true_prev</span> <span class="o">=</span> <span class="n">true_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
||||
<span class="n">estim_prev</span> <span class="o">=</span> <span class="n">estim_prev</span><span class="p">[:,</span><span class="n">pos_class</span><span class="p">]</span>
|
||||
|
||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">true_prev</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">ind</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)):</span>
|
||||
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span><span class="o">==</span><span class="n">ind</span>
|
||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prev</span><span class="p">[</span><span class="n">selected</span><span class="p">]</span> <span class="o">-</span> <span class="n">true_prev</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
|
||||
|
||||
<span class="n">nmethods</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span>
|
||||
<span class="n">boxwidth</span> <span class="o">=</span> <span class="n">binwidth</span><span class="o">/</span><span class="p">(</span><span class="n">nmethods</span><span class="o">+</span><span class="mi">4</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="nb">bin</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]):</span>
|
||||
<span class="n">boxdata</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">method_names</span><span class="p">]</span>
|
||||
<span class="n">positions</span> <span class="o">=</span> <span class="p">[</span><span class="nb">bin</span><span class="o">+</span><span class="p">(</span><span class="n">i</span><span class="o">*</span><span class="n">boxwidth</span><span class="p">)</span><span class="o">+</span><span class="mi">2</span><span class="o">*</span><span class="n">boxwidth</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">_</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">method_names</span><span class="p">)]</span>
|
||||
<span class="n">box</span> <span class="o">=</span> <span class="n">boxplot</span><span class="p">(</span><span class="n">boxdata</span><span class="p">,</span> <span class="n">showmeans</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">positions</span><span class="o">=</span><span class="n">positions</span><span class="p">,</span> <span class="n">widths</span> <span class="o">=</span> <span class="n">boxwidth</span><span class="p">,</span> <span class="n">sym</span><span class="o">=</span><span class="s1">'+'</span><span class="p">,</span> <span class="n">patch_artist</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">boxid</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)):</span>
|
||||
<span class="n">c</span> <span class="o">=</span> <span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">[</span><span class="n">boxid</span><span class="o">%</span><span class="nb">len</span><span class="p">(</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">)]</span>
|
||||
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">'fliers'</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="n">c</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">'+'</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mf">3.</span><span class="p">,</span> <span class="n">markeredgecolor</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
|
||||
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">'boxes'</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
|
||||
<span class="n">setp</span><span class="p">(</span><span class="n">box</span><span class="p">[</span><span class="s1">'medians'</span><span class="p">][</span><span class="n">boxid</span><span class="p">],</span> <span class="n">color</span><span class="o">=</span><span class="s1">'k'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">major_xticks_positions</span><span class="p">,</span> <span class="n">minor_xticks_positions</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="n">major_xticks_labels</span><span class="p">,</span> <span class="n">minor_xticks_labels</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">b</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]):</span>
|
||||
<span class="n">major_xticks_positions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">b</span><span class="p">)</span>
|
||||
<span class="n">minor_xticks_positions</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">b</span> <span class="o">+</span> <span class="n">binwidth</span> <span class="o">/</span> <span class="mi">2</span><span class="p">)</span>
|
||||
<span class="n">major_xticks_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">''</span><span class="p">)</span>
|
||||
<span class="n">minor_xticks_labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s1">'[</span><span class="si">{</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">-</span><span class="si">{</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">1</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">)'</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticks</span><span class="p">(</span><span class="n">major_xticks_positions</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticks</span><span class="p">(</span><span class="n">minor_xticks_positions</span><span class="p">,</span> <span class="n">minor</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticklabels</span><span class="p">(</span><span class="n">major_xticks_labels</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xticklabels</span><span class="p">(</span><span class="n">minor_xticks_labels</span><span class="p">,</span> <span class="n">minor</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="s1">'vertical'</span> <span class="k">if</span> <span class="n">vertical_xticks</span> <span class="k">else</span> <span class="s1">'horizontal'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">vertical_xticks</span><span class="p">:</span>
|
||||
<span class="c1"># Pad margins so that markers don't get clipped by the axes</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">margins</span><span class="p">(</span><span class="mf">0.2</span><span class="p">)</span>
|
||||
<span class="c1"># Tweak spacing to prevent clipping of tick-labels</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">bottom</span><span class="o">=</span><span class="mf">0.15</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">legend</span><span class="p">:</span>
|
||||
<span class="c1"># adds the legend to the list hs, initialized with the "ideal" quantifier (one that has 0 bias across all bins. i.e.</span>
|
||||
<span class="c1"># a line from (0,0) to (1,0). The other elements are simply labelled dot-plots that are to be removed (setting</span>
|
||||
<span class="c1"># set_visible to False for all but the first element) after the legend has been placed</span>
|
||||
<span class="n">hs</span><span class="o">=</span><span class="p">[</span><span class="n">ax</span><span class="o">.</span><span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">'-k'</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)[</span><span class="mi">0</span><span class="p">]]</span>
|
||||
<span class="k">for</span> <span class="n">colorid</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)):</span>
|
||||
<span class="n">color</span><span class="o">=</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">[</span><span class="n">colorid</span> <span class="o">%</span> <span class="nb">len</span><span class="p">(</span><span class="n">colormap</span><span class="o">.</span><span class="n">colors</span><span class="p">)]</span>
|
||||
<span class="n">h</span><span class="p">,</span> <span class="o">=</span> <span class="n">plot</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">'-s'</span><span class="p">,</span> <span class="n">markerfacecolor</span><span class="o">=</span><span class="n">color</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'k'</span><span class="p">,</span><span class="n">mec</span><span class="o">=</span><span class="n">color</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mf">1.</span><span class="p">)</span>
|
||||
<span class="n">hs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
|
||||
<span class="n">box</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">get_position</span><span class="p">()</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_position</span><span class="p">([</span><span class="n">box</span><span class="o">.</span><span class="n">x0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">y0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">width</span> <span class="o">*</span> <span class="mf">0.8</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">height</span><span class="p">])</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">hs</span><span class="p">,</span> <span class="p">[</span><span class="s1">'ideal'</span><span class="p">]</span><span class="o">+</span><span class="n">method_names</span><span class="p">,</span> <span class="n">loc</span><span class="o">=</span><span class="s1">'center left'</span><span class="p">,</span> <span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">))</span>
|
||||
<span class="p">[</span><span class="n">h</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span> <span class="k">for</span> <span class="n">h</span> <span class="ow">in</span> <span class="n">hs</span><span class="p">[</span><span class="mi">1</span><span class="p">:]]</span>
|
||||
|
||||
<span class="c1"># x-axis and y-axis labels and limits</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="s1">'prevalence'</span><span class="p">,</span> <span class="n">ylabel</span><span class="o">=</span><span class="s1">'error bias'</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="error_by_drift">
|
||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.error_by_drift">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">error_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span>
|
||||
<span class="n">n_bins</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">error_name</span><span class="o">=</span><span class="s1">'ae'</span><span class="p">,</span> <span class="n">show_std</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
||||
<span class="n">show_density</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
||||
<span class="n">show_legend</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
|
||||
<span class="n">logscale</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
|
||||
<span class="n">title</span><span class="o">=</span><span class="sa">f</span><span class="s1">'Quantification error as a function of distribution shift'</span><span class="p">,</span>
|
||||
<span class="n">vlines</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Plots the error (along the x-axis, as measured in terms of `error_name`) as a function of the train-test shift</span>
|
||||
<span class="sd"> (along the y-axis, as measured in terms of :meth:`quapy.error.ae`). This plot is useful especially for multiclass</span>
|
||||
<span class="sd"> problems, in which "diagonal plots" may be cumbersone, and in order to gain understanding about how methods</span>
|
||||
<span class="sd"> fare in different regions of the prior probability shift spectrum (e.g., in the low-shift regime vs. in the</span>
|
||||
<span class="sd"> high-shift regime).</span>
|
||||
|
||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
||||
<span class="sd"> each experiment</span>
|
||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
||||
<span class="sd"> for each experiment</span>
|
||||
<span class="sd"> :param tr_prevs: training prevalence of each experiment</span>
|
||||
<span class="sd"> :param n_bins: number of bins in which the y-axis is to be divided (default is 20)</span>
|
||||
<span class="sd"> :param error_name: a string representing the name of an error function (as defined in `quapy.error`, default is "ae")</span>
|
||||
<span class="sd"> :param show_std: whether or not to show standard deviations as color bands (default is False)</span>
|
||||
<span class="sd"> :param show_density: whether or not to display the distribution of experiments for each bin (default is True)</span>
|
||||
<span class="sd"> :param show_density: whether or not to display the legend of the chart (default is True)</span>
|
||||
<span class="sd"> :param logscale: whether or not to log-scale the y-error measure (default is False)</span>
|
||||
<span class="sd"> :param title: title of the plot (default is "Quantification error as a function of distribution shift")</span>
|
||||
<span class="sd"> :param vlines: array-like list of values (default is None). If indicated, highlights some regions of the space</span>
|
||||
<span class="sd"> using vertical dotted lines.</span>
|
||||
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
|
||||
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
|
||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">()</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">()</span>
|
||||
|
||||
<span class="n">x_error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">ae</span>
|
||||
<span class="n">y_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">error_name</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same</span>
|
||||
<span class="c1"># order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to</span>
|
||||
<span class="c1"># x_error function) and 'y' is the estim-test shift (computed as according to y_error)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">method_order</span> <span class="o">=</span> <span class="n">method_names</span>
|
||||
|
||||
<span class="n">_set_colors</span><span class="p">(</span><span class="n">ax</span><span class="p">,</span> <span class="n">n_methods</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">method_order</span><span class="p">))</span>
|
||||
|
||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">binwidth</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">/</span> <span class="n">n_bins</span>
|
||||
<span class="n">min_x</span><span class="p">,</span> <span class="n">max_x</span><span class="p">,</span> <span class="n">min_y</span><span class="p">,</span> <span class="n">max_y</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
||||
<span class="n">npoints</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
|
||||
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'x'</span><span class="p">]</span>
|
||||
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'y'</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">logscale</span><span class="p">:</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_yscale</span><span class="p">(</span><span class="s2">"log"</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">yaxis</span><span class="o">.</span><span class="n">set_major_formatter</span><span class="p">(</span><span class="n">ScalarFormatter</span><span class="p">())</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">yaxis</span><span class="o">.</span><span class="n">get_major_formatter</span><span class="p">()</span><span class="o">.</span><span class="n">set_scientific</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">minorticks_off</span><span class="p">()</span>
|
||||
|
||||
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">ystds</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">p</span><span class="p">,</span><span class="n">ind</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">))):</span>
|
||||
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span><span class="o">==</span><span class="n">ind</span>
|
||||
<span class="k">if</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">xs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">ind</span><span class="o">*</span><span class="n">binwidth</span><span class="o">-</span><span class="n">binwidth</span><span class="o">/</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="n">ys</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">]))</span>
|
||||
<span class="n">ystds</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">std</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">]))</span>
|
||||
<span class="n">npoints</span><span class="p">[</span><span class="n">p</span><span class="p">]</span> <span class="o">+=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
|
||||
|
||||
<span class="n">xs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">xs</span><span class="p">)</span>
|
||||
<span class="n">ys</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">ys</span><span class="p">)</span>
|
||||
<span class="n">ystds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">ystds</span><span class="p">)</span>
|
||||
|
||||
<span class="n">min_x_method</span><span class="p">,</span> <span class="n">max_x_method</span><span class="p">,</span> <span class="n">min_y_method</span><span class="p">,</span> <span class="n">max_y_method</span> <span class="o">=</span> <span class="n">xs</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">xs</span><span class="o">.</span><span class="n">max</span><span class="p">(),</span> <span class="n">ys</span><span class="o">.</span><span class="n">min</span><span class="p">(),</span> <span class="n">ys</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
|
||||
<span class="n">min_x</span> <span class="o">=</span> <span class="n">min_x_method</span> <span class="k">if</span> <span class="n">min_x</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">min_x_method</span> <span class="o"><</span> <span class="n">min_x</span> <span class="k">else</span> <span class="n">min_x</span>
|
||||
<span class="n">max_x</span> <span class="o">=</span> <span class="n">max_x_method</span> <span class="k">if</span> <span class="n">max_x</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_x_method</span> <span class="o">></span> <span class="n">max_x</span> <span class="k">else</span> <span class="n">max_x</span>
|
||||
<span class="n">max_y</span> <span class="o">=</span> <span class="n">max_y_method</span> <span class="k">if</span> <span class="n">max_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_y_method</span> <span class="o">></span> <span class="n">max_y</span> <span class="k">else</span> <span class="n">max_y</span>
|
||||
<span class="n">min_y</span> <span class="o">=</span> <span class="n">min_y_method</span> <span class="k">if</span> <span class="n">min_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">min_y_method</span> <span class="o"><</span> <span class="n">min_y</span> <span class="k">else</span> <span class="n">min_y</span>
|
||||
<span class="n">max_y</span> <span class="o">=</span> <span class="n">max_y_method</span> <span class="k">if</span> <span class="n">max_y</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">max_y_method</span> <span class="o">></span> <span class="n">max_y</span> <span class="k">else</span> <span class="n">max_y</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">'-'</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">'o'</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'w'</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">errorbar</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="p">,</span> <span class="n">fmt</span><span class="o">=</span><span class="s1">'-'</span><span class="p">,</span> <span class="n">marker</span><span class="o">=</span><span class="s1">'o'</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">markersize</span><span class="o">=</span><span class="mi">6</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">show_std</span><span class="p">:</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">xs</span><span class="p">,</span> <span class="n">ys</span><span class="o">-</span><span class="n">ystds</span><span class="p">,</span> <span class="n">ys</span><span class="o">+</span><span class="n">ystds</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.25</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">show_density</span><span class="p">:</span>
|
||||
<span class="n">ax2</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">twinx</span><span class="p">()</span>
|
||||
<span class="n">densities</span> <span class="o">=</span> <span class="n">npoints</span><span class="o">/</span><span class="n">np</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">npoints</span><span class="p">)</span>
|
||||
<span class="n">ax2</span><span class="o">.</span><span class="n">bar</span><span class="p">([</span><span class="n">ind</span> <span class="o">*</span> <span class="n">binwidth</span><span class="o">-</span><span class="n">binwidth</span><span class="o">/</span><span class="mi">2</span> <span class="k">for</span> <span class="n">ind</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">))],</span>
|
||||
<span class="n">densities</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.15</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'g'</span><span class="p">,</span> <span class="n">width</span><span class="o">=</span><span class="n">binwidth</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s1">'density'</span><span class="p">)</span>
|
||||
<span class="n">ax2</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="nb">max</span><span class="p">(</span><span class="n">densities</span><span class="p">))</span>
|
||||
<span class="n">ax2</span><span class="o">.</span><span class="n">spines</span><span class="p">[</span><span class="s1">'right'</span><span class="p">]</span><span class="o">.</span><span class="n">set_color</span><span class="p">(</span><span class="s1">'g'</span><span class="p">)</span>
|
||||
<span class="n">ax2</span><span class="o">.</span><span class="n">tick_params</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">'y'</span><span class="p">,</span> <span class="n">colors</span><span class="o">=</span><span class="s1">'g'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">xlabel</span><span class="o">=</span><span class="sa">f</span><span class="s1">'Distribution shift between training set and test sample'</span><span class="p">,</span>
|
||||
<span class="n">ylabel</span><span class="o">=</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">error_name</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span><span class="si">}</span><span class="s1"> (true distribution, predicted distribution)'</span><span class="p">,</span>
|
||||
<span class="n">title</span><span class="o">=</span><span class="n">title</span><span class="p">)</span>
|
||||
<span class="n">box</span> <span class="o">=</span> <span class="n">ax</span><span class="o">.</span><span class="n">get_position</span><span class="p">()</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_position</span><span class="p">([</span><span class="n">box</span><span class="o">.</span><span class="n">x0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">y0</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">width</span> <span class="o">*</span> <span class="mf">0.8</span><span class="p">,</span> <span class="n">box</span><span class="o">.</span><span class="n">height</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="n">vlines</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">vline</span> <span class="ow">in</span> <span class="n">vlines</span><span class="p">:</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">axvline</span><span class="p">(</span><span class="n">vline</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">linestyle</span><span class="o">=</span><span class="s1">'--'</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'k'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">min_x</span><span class="p">,</span> <span class="n">max_x</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">logscale</span><span class="p">:</span>
|
||||
<span class="c1">#nice scale for the logaritmic axis</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">10</span> <span class="o">**</span> <span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="n">math</span><span class="o">.</span><span class="n">log10</span><span class="p">(</span><span class="n">max_y</span><span class="p">)))</span>
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="n">show_legend</span><span class="p">:</span>
|
||||
<span class="n">fig</span><span class="o">.</span><span class="n">legend</span><span class="p">(</span><span class="n">loc</span><span class="o">=</span><span class="s1">'lower center'</span><span class="p">,</span>
|
||||
<span class="n">bbox_to_anchor</span><span class="o">=</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">),</span>
|
||||
<span class="n">ncol</span><span class="o">=</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">method_names</span><span class="p">)</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span><span class="o">//</span><span class="mi">2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="brokenbar_supremacy_by_drift">
|
||||
<a class="viewcode-back" href="../../quapy.html#quapy.plot.brokenbar_supremacy_by_drift">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">brokenbar_supremacy_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span>
|
||||
<span class="n">n_bins</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">binning</span><span class="o">=</span><span class="s1">'isomerous'</span><span class="p">,</span>
|
||||
<span class="n">x_error</span><span class="o">=</span><span class="s1">'ae'</span><span class="p">,</span> <span class="n">y_error</span><span class="o">=</span><span class="s1">'ae'</span><span class="p">,</span> <span class="n">ttest_alpha</span><span class="o">=</span><span class="mf">0.005</span><span class="p">,</span> <span class="n">tail_density_threshold</span><span class="o">=</span><span class="mf">0.005</span><span class="p">,</span>
|
||||
<span class="n">method_order</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">savepath</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Displays (only) the top performing methods for different regions of the train-test shift in form of a broken</span>
|
||||
<span class="sd"> bar chart, in which each method has bars only for those regions in which either one of the following conditions</span>
|
||||
<span class="sd"> hold: (i) it is the best method (in average) for the bin, or (ii) it is not statistically significantly different</span>
|
||||
<span class="sd"> (in average) as according to a two-sided t-test on independent samples at confidence `ttest_alpha`.</span>
|
||||
<span class="sd"> The binning can be made "isometric" (same size), or "isomerous" (same number of experiments -- default). A second</span>
|
||||
<span class="sd"> plot is displayed on top, that displays the distribution of experiments for each bin (when binning="isometric") or</span>
|
||||
<span class="sd"> the percentiles points of the distribution (when binning="isomerous").</span>
|
||||
|
||||
<span class="sd"> :param method_names: array-like with the method names for each experiment</span>
|
||||
<span class="sd"> :param true_prevs: array-like with the true prevalence values (each being a ndarray with n_classes components) for</span>
|
||||
<span class="sd"> each experiment</span>
|
||||
<span class="sd"> :param estim_prevs: array-like with the estimated prevalence values (each being a ndarray with n_classes components)</span>
|
||||
<span class="sd"> for each experiment</span>
|
||||
<span class="sd"> :param tr_prevs: training prevalence of each experiment</span>
|
||||
<span class="sd"> :param n_bins: number of bins in which the y-axis is to be divided (default is 20)</span>
|
||||
<span class="sd"> :param binning: type of binning, either "isomerous" (default) or "isometric"</span>
|
||||
<span class="sd"> :param x_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for</span>
|
||||
<span class="sd"> measuring the amount of train-test shift (default is "ae")</span>
|
||||
<span class="sd"> :param y_error: a string representing the name of an error function (as defined in `quapy.error`) to be used for</span>
|
||||
<span class="sd"> measuring the amount of error in the prevalence estimations (default is "ae")</span>
|
||||
<span class="sd"> :param ttest_alpha: the confidence interval above which a p-value (two-sided t-test on independent samples) is</span>
|
||||
<span class="sd"> to be considered as an indicator that the two means are not statistically significantly different. Default is</span>
|
||||
<span class="sd"> 0.005, meaning that a `p-value > 0.005` indicates the two methods involved are to be considered similar</span>
|
||||
<span class="sd"> :param tail_density_threshold: sets a threshold on the density of experiments (over the total number of experiments)</span>
|
||||
<span class="sd"> below which a bin in the tail (i.e., the right-most ones) will be discarded. This is in order to avoid some</span>
|
||||
<span class="sd"> bins to be shown for train-test outliers.</span>
|
||||
<span class="sd"> :param method_order: if indicated (default is None), imposes the order in which the methods are processed (i.e.,</span>
|
||||
<span class="sd"> listed in the legend and associated with matplotlib colors).</span>
|
||||
<span class="sd"> :param savepath: path where to save the plot. If not indicated (as default), the plot is shown.</span>
|
||||
<span class="sd"> :return:</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">binning</span> <span class="ow">in</span> <span class="p">[</span><span class="s1">'isomerous'</span><span class="p">,</span> <span class="s1">'isometric'</span><span class="p">],</span> <span class="s1">'unknown binning type; valid types are "isomerous" and "isometric"'</span>
|
||||
|
||||
<span class="n">x_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">x_error</span><span class="p">)</span>
|
||||
<span class="n">y_error</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># get all data as a dictionary {'m':{'x':ndarray, 'y':ndarray}} where 'm' is a method name (in the same</span>
|
||||
<span class="c1"># order as in method_order (if specified), and where 'x' are the train-test shifts (computed as according to</span>
|
||||
<span class="c1"># x_error function) and 'y' is the estim-test shift (computed as according to y_error)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">method_order</span> <span class="o">=</span> <span class="n">method_names</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">binning</span> <span class="o">==</span> <span class="s1">'isomerous'</span><span class="p">:</span>
|
||||
<span class="c1"># take bins containing the same amount of examples</span>
|
||||
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">'x'</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">])</span>
|
||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">q</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">))</span><span class="o">.</span><span class="n">flatten</span><span class="p">()</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># take equidistant bins</span>
|
||||
<span class="n">bins</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">n_bins</span><span class="o">+</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">bins</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mf">0.001</span>
|
||||
<span class="n">bins</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span> <span class="o">+=</span> <span class="mf">0.001</span>
|
||||
|
||||
<span class="c1"># we use this to keep track of how many datapoits contribute to each bin</span>
|
||||
<span class="n">inds_histogram_global</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">n_bins</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">float</span><span class="p">)</span>
|
||||
<span class="n">n_methods</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">method_order</span><span class="p">)</span>
|
||||
<span class="n">buckets</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="n">n_methods</span><span class="p">,</span> <span class="n">n_bins</span><span class="p">,</span> <span class="mi">3</span><span class="p">))</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">method</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">method_order</span><span class="p">):</span>
|
||||
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'x'</span><span class="p">]</span>
|
||||
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'y'</span><span class="p">]</span>
|
||||
|
||||
<span class="n">inds</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">digitize</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">bins</span><span class="p">,</span> <span class="n">right</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="n">inds_histogram_global</span> <span class="o">+=</span> <span class="n">np</span><span class="o">.</span><span class="n">histogram</span><span class="p">(</span><span class="n">tr_test_drifts</span><span class="p">,</span> <span class="n">density</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="n">bins</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)):</span>
|
||||
<span class="n">selected</span> <span class="o">=</span> <span class="n">inds</span> <span class="o">==</span> <span class="n">j</span>
|
||||
<span class="k">if</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
|
||||
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">std</span><span class="p">(</span><span class="n">method_drifts</span><span class="p">[</span><span class="n">selected</span><span class="p">])</span>
|
||||
<span class="n">buckets</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">=</span> <span class="n">selected</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># cancel last buckets with low density</span>
|
||||
<span class="n">histogram</span> <span class="o">=</span> <span class="n">inds_histogram_global</span> <span class="o">/</span> <span class="n">inds_histogram_global</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">tail</span> <span class="ow">in</span> <span class="nb">reversed</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">histogram</span><span class="p">))):</span>
|
||||
<span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">tail</span><span class="p">]</span> <span class="o"><</span> <span class="n">tail_density_threshold</span><span class="p">:</span>
|
||||
<span class="n">buckets</span><span class="p">[:,</span><span class="n">tail</span><span class="p">,</span><span class="mi">2</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">break</span>
|
||||
|
||||
<span class="n">salient_methods</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
|
||||
<span class="n">best_methods</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">bucket</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">buckets</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]):</span>
|
||||
<span class="n">nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[:,</span> <span class="n">bucket</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
|
||||
<span class="k">if</span> <span class="n">nc</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">best_methods</span><span class="o">.</span><span class="n">append</span><span class="p">([])</span>
|
||||
<span class="k">continue</span>
|
||||
|
||||
<span class="n">order</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="n">buckets</span><span class="p">[:,</span> <span class="n">bucket</span><span class="p">,</span> <span class="mi">0</span><span class="p">])</span>
|
||||
<span class="n">rank1</span> <span class="o">=</span> <span class="n">order</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">best_bucket_methods</span> <span class="o">=</span> <span class="p">[</span><span class="n">method_order</span><span class="p">[</span><span class="n">rank1</span><span class="p">]]</span>
|
||||
<span class="n">best_mean</span><span class="p">,</span> <span class="n">best_std</span><span class="p">,</span> <span class="n">best_nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[</span><span class="n">rank1</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="p">:]</span>
|
||||
<span class="k">for</span> <span class="n">method_index</span> <span class="ow">in</span> <span class="n">order</span><span class="p">[</span><span class="mi">1</span><span class="p">:]:</span>
|
||||
<span class="n">method_mean</span><span class="p">,</span> <span class="n">method_std</span><span class="p">,</span> <span class="n">method_nc</span> <span class="o">=</span> <span class="n">buckets</span><span class="p">[</span><span class="n">method_index</span><span class="p">,</span> <span class="n">bucket</span><span class="p">,</span> <span class="p">:]</span>
|
||||
<span class="n">_</span><span class="p">,</span> <span class="n">pval</span> <span class="o">=</span> <span class="n">ttest_ind_from_stats</span><span class="p">(</span><span class="n">best_mean</span><span class="p">,</span> <span class="n">best_std</span><span class="p">,</span> <span class="n">best_nc</span><span class="p">,</span> <span class="n">method_mean</span><span class="p">,</span> <span class="n">method_std</span><span class="p">,</span> <span class="n">method_nc</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">pval</span> <span class="o">></span> <span class="n">ttest_alpha</span><span class="p">:</span>
|
||||
<span class="n">best_bucket_methods</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method_order</span><span class="p">[</span><span class="n">method_index</span><span class="p">])</span>
|
||||
<span class="n">best_methods</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
|
||||
<span class="n">salient_methods</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">best_bucket_methods</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">binning</span><span class="o">==</span><span class="s1">'isomerous'</span><span class="p">:</span>
|
||||
<span class="n">fig</span><span class="p">,</span> <span class="n">axes</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">gridspec_kw</span><span class="o">=</span><span class="p">{</span><span class="s1">'height_ratios'</span><span class="p">:</span> <span class="p">[</span><span class="mf">0.2</span><span class="p">,</span> <span class="mi">1</span><span class="p">]},</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">salient_methods</span><span class="p">)))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">fig</span><span class="p">,</span> <span class="n">axes</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">gridspec_kw</span><span class="o">=</span><span class="p">{</span><span class="s1">'height_ratios'</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]},</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">salient_methods</span><span class="p">)))</span>
|
||||
|
||||
<span class="n">ax</span> <span class="o">=</span> <span class="n">axes</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">high_from</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="n">yticks</span><span class="p">,</span> <span class="n">yticks_method_names</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="n">color</span> <span class="o">=</span> <span class="n">get_cmap</span><span class="p">(</span><span class="s1">'Accent'</span><span class="p">)</span><span class="o">.</span><span class="n">colors</span>
|
||||
<span class="n">vlines</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">bar_high</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="k">for</span> <span class="n">method</span> <span class="ow">in</span> <span class="p">[</span><span class="n">m</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span> <span class="k">if</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">salient_methods</span><span class="p">]:</span>
|
||||
<span class="n">broken_paths</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
||||
<span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">best_bucket_methods</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">best_methods</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">method</span> <span class="ow">in</span> <span class="n">best_bucket_methods</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">path_start</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||||
<span class="n">path_end</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">path_start</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">path_end</span> <span class="o">+=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">broken_paths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">tuple</span><span class="p">((</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span><span class="p">)))</span>
|
||||
<span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span>
|
||||
<span class="k">if</span> <span class="n">path_start</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">broken_paths</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="nb">tuple</span><span class="p">((</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span><span class="p">)))</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">broken_barh</span><span class="p">(</span><span class="n">broken_paths</span><span class="p">,</span> <span class="p">(</span><span class="n">high_from</span><span class="p">,</span> <span class="n">bar_high</span><span class="p">),</span> <span class="n">facecolors</span><span class="o">=</span><span class="n">color</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">yticks_method_names</span><span class="p">)])</span>
|
||||
<span class="n">yticks</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">high_from</span><span class="o">+</span><span class="n">bar_high</span><span class="o">/</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="n">high_from</span> <span class="o">+=</span> <span class="n">bar_high</span>
|
||||
<span class="n">yticks_method_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">path_start</span><span class="p">,</span> <span class="n">path_end</span> <span class="ow">in</span> <span class="n">broken_paths</span><span class="p">:</span>
|
||||
<span class="n">vlines</span><span class="o">.</span><span class="n">extend</span><span class="p">([</span><span class="n">path_start</span><span class="p">,</span> <span class="n">path_start</span><span class="o">+</span><span class="n">path_end</span><span class="p">])</span>
|
||||
|
||||
<span class="n">vlines</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">vlines</span><span class="p">)</span>
|
||||
<span class="n">vlines</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">vlines</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">vlines</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]:</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">axvline</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="n">v</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'k'</span><span class="p">,</span> <span class="n">linestyle</span><span class="o">=</span><span class="s1">'--'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">high_from</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlabel</span><span class="p">(</span><span class="s1">'Distribution shift between training set and sample'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_yticks</span><span class="p">(</span><span class="n">yticks</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_yticklabels</span><span class="p">(</span><span class="n">yticks_method_names</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># upper plot (explaining distribution)</span>
|
||||
<span class="n">ax</span> <span class="o">=</span> <span class="n">axes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="k">if</span> <span class="n">binning</span> <span class="o">==</span> <span class="s1">'isometric'</span><span class="p">:</span>
|
||||
<span class="c1"># show the density for each region</span>
|
||||
<span class="n">bins</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">=</span><span class="mi">0</span>
|
||||
<span class="n">y_pos</span> <span class="o">=</span> <span class="p">[</span><span class="n">b</span><span class="o">+</span><span class="p">(</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">b</span><span class="p">)</span><span class="o">/</span><span class="mi">2</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span><span class="n">b</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span> <span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">></span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">bar_width</span> <span class="o">=</span> <span class="p">[</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">]))</span> <span class="k">if</span> <span class="n">histogram</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">></span><span class="mi">0</span><span class="p">]</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">bar</span><span class="p">(</span><span class="n">y_pos</span><span class="p">,</span> <span class="p">[</span><span class="n">n</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="n">histogram</span> <span class="k">if</span> <span class="n">n</span><span class="o">></span><span class="mi">0</span><span class="p">],</span> <span class="n">bar_width</span><span class="p">,</span> <span class="n">align</span><span class="o">=</span><span class="s1">'center'</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">'silver'</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">'shift</span><span class="se">\n</span><span class="s1">distribution'</span><span class="p">,</span> <span class="n">rotation</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">'right'</span><span class="p">,</span> <span class="n">va</span><span class="o">=</span><span class="s1">'center'</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">get_xaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">wspace</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">hspace</span><span class="o">=</span><span class="mf">0.1</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="c1"># show the percentiles of the distribution</span>
|
||||
<span class="n">cumsum</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">cumsum</span><span class="p">(</span><span class="n">histogram</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])):</span>
|
||||
<span class="n">start</span><span class="p">,</span> <span class="n">width</span> <span class="o">=</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">broken_barh</span><span class="p">([</span><span class="nb">tuple</span><span class="p">((</span><span class="n">start</span><span class="p">,</span> <span class="n">width</span><span class="p">))],</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">),</span> <span class="n">facecolors</span><span class="o">=</span><span class="s1">'whitesmoke'</span> <span class="k">if</span> <span class="n">i</span><span class="o">%</span><span class="mi">2</span><span class="o">==</span><span class="mi">0</span> <span class="k">else</span> <span class="s1">'silver'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">i</span> <span class="o"><</span> <span class="nb">len</span><span class="p">(</span><span class="n">bins</span><span class="p">)</span><span class="o">-</span><span class="mi">2</span><span class="p">:</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">bins</span><span class="p">[</span><span class="n">i</span><span class="o">+</span><span class="mi">1</span><span class="p">],</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'$P_{'</span><span class="o">+</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">cumsum</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">*</span><span class="mi">100</span><span class="p">))</span><span class="si">}</span><span class="s1">'</span><span class="o">+</span><span class="s1">'}$'</span><span class="p">,</span> <span class="n">ha</span><span class="o">=</span><span class="s1">'center'</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="n">vlines</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">vlines</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">get_yaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">get_xaxis</span><span class="p">()</span><span class="o">.</span><span class="n">set_visible</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">subplots_adjust</span><span class="p">(</span><span class="n">wspace</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">hspace</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
|
||||
<span class="n">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_merge</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
||||
<span class="n">ndims</span> <span class="o">=</span> <span class="n">true_prevs</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="p">{</span><span class="s1">'true'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ndims</span><span class="p">)),</span> <span class="s1">'estim'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">ndims</span><span class="p">))})</span>
|
||||
<span class="n">method_order</span><span class="o">=</span><span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">true_prev</span><span class="p">,</span> <span class="n">estim_prev</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">):</span>
|
||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'true'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'true'</span><span class="p">],</span> <span class="n">true_prev</span><span class="p">])</span>
|
||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'estim'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'estim'</span><span class="p">],</span> <span class="n">estim_prev</span><span class="p">])</span>
|
||||
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
|
||||
<span class="n">method_order</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
|
||||
<span class="n">true_prevs_</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">'true'</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
|
||||
<span class="n">estim_prevs_</span> <span class="o">=</span> <span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">m</span><span class="p">][</span><span class="s1">'estim'</span><span class="p">]</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">]</span>
|
||||
<span class="k">return</span> <span class="n">method_order</span><span class="p">,</span> <span class="n">true_prevs_</span><span class="p">,</span> <span class="n">estim_prevs_</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_set_colors</span><span class="p">(</span><span class="n">ax</span><span class="p">,</span> <span class="n">n_methods</span><span class="p">):</span>
|
||||
<span class="n">NUM_COLORS</span> <span class="o">=</span> <span class="n">n_methods</span>
|
||||
<span class="n">cm</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">get_cmap</span><span class="p">(</span><span class="s1">'tab20'</span><span class="p">)</span>
|
||||
<span class="n">ax</span><span class="o">.</span><span class="n">set_prop_cycle</span><span class="p">(</span><span class="n">color</span><span class="o">=</span><span class="p">[</span><span class="n">cm</span><span class="p">(</span><span class="mf">1.</span> <span class="o">*</span> <span class="n">i</span> <span class="o">/</span> <span class="n">NUM_COLORS</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">NUM_COLORS</span><span class="p">)])</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_save_or_show</span><span class="p">(</span><span class="n">savepath</span><span class="p">):</span>
|
||||
<span class="c1"># if savepath is specified, then saves the plot in that path; otherwise the plot is shown</span>
|
||||
<span class="k">if</span> <span class="n">savepath</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">create_parent_dir</span><span class="p">(</span><span class="n">savepath</span><span class="p">)</span>
|
||||
<span class="c1"># plt.tight_layout()</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">savefig</span><span class="p">(</span><span class="n">savepath</span><span class="p">,</span> <span class="n">bbox_inches</span><span class="o">=</span><span class="s1">'tight'</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_join_data_by_drift</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">,</span> <span class="n">x_error</span><span class="p">,</span> <span class="n">y_error</span><span class="p">,</span> <span class="n">method_order</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="p">{</span><span class="s1">'x'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">)),</span> <span class="s1">'y'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">empty</span><span class="p">(</span><span class="n">shape</span><span class="o">=</span><span class="p">(</span><span class="mi">0</span><span class="p">))})</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">method_order</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">method_order</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
|
||||
<span class="k">for</span> <span class="n">method</span><span class="p">,</span> <span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">estim_prevs_i</span><span class="p">,</span> <span class="n">tr_prev_i</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">method_names</span><span class="p">,</span> <span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">,</span> <span class="n">tr_prevs</span><span class="p">):</span>
|
||||
<span class="n">tr_prev_i</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">tr_prev_i</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">),</span> <span class="n">repeats</span><span class="o">=</span><span class="n">test_prevs_i</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
|
||||
<span class="n">tr_test_drifts</span> <span class="o">=</span> <span class="n">x_error</span><span class="p">(</span><span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">tr_prev_i</span><span class="p">)</span>
|
||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'x'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'x'</span><span class="p">],</span> <span class="n">tr_test_drifts</span><span class="p">])</span>
|
||||
|
||||
<span class="n">method_drifts</span> <span class="o">=</span> <span class="n">y_error</span><span class="p">(</span><span class="n">test_prevs_i</span><span class="p">,</span> <span class="n">estim_prevs_i</span><span class="p">)</span>
|
||||
<span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'y'</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">([</span><span class="n">data</span><span class="p">[</span><span class="n">method</span><span class="p">][</span><span class="s1">'y'</span><span class="p">],</span> <span class="n">method_drifts</span><span class="p">])</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">method_order</span><span class="p">:</span>
|
||||
<span class="n">method_order</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">method</span><span class="p">)</span>
|
||||
|
||||
<span class="k">return</span> <span class="n">data</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,606 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.protocol — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||||
<script src="../../_static/jquery.js"></script>
|
||||
<script src="../../_static/underscore.js"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../_static/doctools.js"></script>
|
||||
<script src="../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.protocol</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.protocol</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">from</span> <span class="nn">copy</span> <span class="kn">import</span> <span class="n">deepcopy</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">itertools</span>
|
||||
<span class="kn">from</span> <span class="nn">contextlib</span> <span class="kn">import</span> <span class="n">ExitStack</span>
|
||||
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||||
<span class="kn">from</span> <span class="nn">os.path</span> <span class="kn">import</span> <span class="n">exists</span>
|
||||
<span class="kn">from</span> <span class="nn">glob</span> <span class="kn">import</span> <span class="n">glob</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="AbstractProtocol"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractProtocol">[docs]</a><span class="k">class</span> <span class="nc">AbstractProtocol</span><span class="p">(</span><span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Abstract parent class for sample generation protocols.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="nd">@abstractmethod</span>
|
||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implements the protocol. Yields one sample at a time along with its prevalence</span>
|
||||
|
||||
<span class="sd"> :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances</span>
|
||||
<span class="sd"> and in which `prev` is an `nd.array` with the class prevalence values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span>
|
||||
|
||||
<div class="viewcode-block" id="AbstractProtocol.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractProtocol.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Indicates the total number of samples that the protocol generates.</span>
|
||||
|
||||
<span class="sd"> :return: The number of samples to generate if known, or `None` otherwise.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="kc">None</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="IterateProtocol"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.IterateProtocol">[docs]</a><span class="k">class</span> <span class="nc">IterateProtocol</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> A very simple protocol which simply iterates over a list of previously generated samples</span>
|
||||
|
||||
<span class="sd"> :param samples: a list of :class:`quapy.data.base.LabelledCollection`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">samples</span><span class="p">:</span> <span class="p">[</span><span class="n">LabelledCollection</span><span class="p">]):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">samples</span> <span class="o">=</span> <span class="n">samples</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Yields one sample from the initial list at a time</span>
|
||||
|
||||
<span class="sd"> :return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances</span>
|
||||
<span class="sd"> and in which `prev` is an `nd.array` with the class prevalence values</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">:</span>
|
||||
<span class="k">yield</span> <span class="n">sample</span><span class="o">.</span><span class="n">Xp</span>
|
||||
|
||||
<div class="viewcode-block" id="IterateProtocol.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.IterateProtocol.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the number of samples in this protocol</span>
|
||||
|
||||
<span class="sd"> :return: int</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">samples</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="AbstractStochasticSeededProtocol"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol">[docs]</a><span class="k">class</span> <span class="nc">AbstractStochasticSeededProtocol</span><span class="p">(</span><span class="n">AbstractProtocol</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g.,</span>
|
||||
<span class="sd"> via random sampling), sequences of :class:`quapy.data.base.LabelledCollection` samples.</span>
|
||||
<span class="sd"> The protocol abstraction enforces</span>
|
||||
<span class="sd"> the object to be instantiated using a seed, so that the sequence can be fully replicated.</span>
|
||||
<span class="sd"> In order to make this functionality possible, the classes extending this abstraction need to</span>
|
||||
<span class="sd"> implement only two functions, :meth:`samples_parameters` which generates all the parameters</span>
|
||||
<span class="sd"> needed for extracting the samples, and :meth:`sample` that, given some parameters as input,</span>
|
||||
<span class="sd"> deterministically generates a sample.</span>
|
||||
|
||||
<span class="sd"> :param random_state: the seed for allowing to replicate any sequence of samples. Default is 0, meaning that</span>
|
||||
<span class="sd"> the sequence will be consistent every time the protocol is called.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">_random_state</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span> <span class="c1"># means "not set"</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
||||
|
||||
<span class="nd">@property</span>
|
||||
<span class="k">def</span> <span class="nf">random_state</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_random_state</span>
|
||||
|
||||
<span class="nd">@random_state</span><span class="o">.</span><span class="n">setter</span>
|
||||
<span class="k">def</span> <span class="nf">random_state</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">random_state</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">_random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
||||
|
||||
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.samples_parameters">[docs]</a> <span class="nd">@abstractmethod</span>
|
||||
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> This function has to return all the necessary parameters to replicate the samples</span>
|
||||
|
||||
<span class="sd"> :return: a list of parameters, each of which serves to deterministically generate a sample</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span></div>
|
||||
|
||||
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.sample">[docs]</a> <span class="nd">@abstractmethod</span>
|
||||
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Extract one sample determined by the given parameters</span>
|
||||
|
||||
<span class="sd"> :param params: all the necessary parameters to generate a sample</span>
|
||||
<span class="sd"> :return: one sample (the same sample has to be generated for the same parameters)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="o">...</span></div>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Yields one sample at a time. The type of object returned depends on the `collator` function. The</span>
|
||||
<span class="sd"> default behaviour returns tuples of the form `(sample, prevalence)`.</span>
|
||||
|
||||
<span class="sd"> :return: a tuple `(sample, prevalence)` if return_type='sample_prev', or an instance of</span>
|
||||
<span class="sd"> :class:`qp.data.LabelledCollection` if return_type='labelled_collection'</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">with</span> <span class="n">ExitStack</span><span class="p">()</span> <span class="k">as</span> <span class="n">stack</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'The random seed has never been initialized. '</span>
|
||||
<span class="s1">'Set it to None not to impose replicability.'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">stack</span><span class="o">.</span><span class="n">enter_context</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">random_state</span><span class="p">))</span>
|
||||
<span class="k">for</span> <span class="n">params</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">samples_parameters</span><span class="p">():</span>
|
||||
<span class="k">yield</span> <span class="bp">self</span><span class="o">.</span><span class="n">collator</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">params</span><span class="p">))</span>
|
||||
|
||||
<div class="viewcode-block" id="AbstractStochasticSeededProtocol.collator"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.AbstractStochasticSeededProtocol.collator">[docs]</a> <span class="k">def</span> <span class="nf">collator</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sample</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> The collator prepares the sample to accommodate the desired output format before returning the output.</span>
|
||||
<span class="sd"> This collator simply returns the sample as it is. Classes inheriting from this abstract class can</span>
|
||||
<span class="sd"> implement their custom collators.</span>
|
||||
|
||||
<span class="sd"> :param sample: the sample to be returned</span>
|
||||
<span class="sd"> :param args: additional arguments</span>
|
||||
<span class="sd"> :return: the sample adhering to a desired output format (in this case, the sample is returned as it is)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">sample</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="OnLabelledCollectionProtocol"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol">[docs]</a><span class="k">class</span> <span class="nc">OnLabelledCollectionProtocol</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Protocols that generate samples from a :class:`qp.data.LabelledCollection` object.</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">RETURN_TYPES</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'sample_prev'</span><span class="p">,</span> <span class="s1">'labelled_collection'</span><span class="p">,</span> <span class="s1">'index'</span><span class="p">]</span>
|
||||
|
||||
<div class="viewcode-block" id="OnLabelledCollectionProtocol.get_labelled_collection"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.get_labelled_collection">[docs]</a> <span class="k">def</span> <span class="nf">get_labelled_collection</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the labelled collection on which this protocol acts.</span>
|
||||
|
||||
<span class="sd"> :return: an object of type :class:`qp.data.LabelledCollection`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span></div>
|
||||
|
||||
<div class="viewcode-block" id="OnLabelledCollectionProtocol.on_preclassified_instances"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.on_preclassified_instances">[docs]</a> <span class="k">def</span> <span class="nf">on_preclassified_instances</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">pre_classifications</span><span class="p">,</span> <span class="n">in_place</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns a copy of this protocol that acts on a modified version of the original</span>
|
||||
<span class="sd"> :class:`qp.data.LabelledCollection` in which the original instances have been replaced</span>
|
||||
<span class="sd"> with the outputs of a classifier for each instance. (This is convenient for speeding-up</span>
|
||||
<span class="sd"> the evaluation procedures for many samples, by pre-classifying the instances in advance.)</span>
|
||||
|
||||
<span class="sd"> :param pre_classifications: the predictions issued by a classifier, typically an array-like</span>
|
||||
<span class="sd"> with shape `(n_instances,)` when the classifier is a hard one, or with shape</span>
|
||||
<span class="sd"> `(n_instances, n_classes)` when the classifier is a probabilistic one.</span>
|
||||
<span class="sd"> :param in_place: whether or not to apply the modification in-place or in a new copy (default).</span>
|
||||
<span class="sd"> :return: a copy of this protocol</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'error: the pre-classified data has different shape '</span> \
|
||||
<span class="sa">f</span><span class="s1">'(expected </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">)</span><span class="si">}</span><span class="s1">, found </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">)</span><span class="si">}</span><span class="s1">)'</span>
|
||||
<span class="k">if</span> <span class="n">in_place</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">pre_classifications</span>
|
||||
<span class="k">return</span> <span class="bp">self</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">new</span> <span class="o">=</span> <span class="n">deepcopy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">new</span><span class="o">.</span><span class="n">on_preclassified_instances</span><span class="p">(</span><span class="n">pre_classifications</span><span class="p">,</span> <span class="n">in_place</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="OnLabelledCollectionProtocol.get_collator"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.OnLabelledCollectionProtocol.get_collator">[docs]</a> <span class="nd">@classmethod</span>
|
||||
<span class="k">def</span> <span class="nf">get_collator</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns a collator function, i.e., a function that prepares the yielded data</span>
|
||||
|
||||
<span class="sd"> :param return_type: either 'sample_prev' (default) if the collator is requested to yield tuples of</span>
|
||||
<span class="sd"> `(sample, prevalence)`, or 'labelled_collection' when it is requested to yield instances of</span>
|
||||
<span class="sd"> :class:`qp.data.LabelledCollection`</span>
|
||||
<span class="sd"> :return: the collator function (a callable function that takes as input an instance of</span>
|
||||
<span class="sd"> :class:`qp.data.LabelledCollection`)</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">assert</span> <span class="n">return_type</span> <span class="ow">in</span> <span class="bp">cls</span><span class="o">.</span><span class="n">RETURN_TYPES</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'unknown return type passed as argument; valid ones are </span><span class="si">{</span><span class="bp">cls</span><span class="o">.</span><span class="n">RETURN_TYPES</span><span class="si">}</span><span class="s1">'</span>
|
||||
<span class="k">if</span> <span class="n">return_type</span><span class="o">==</span><span class="s1">'sample_prev'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="k">lambda</span> <span class="n">lc</span><span class="p">:</span><span class="n">lc</span><span class="o">.</span><span class="n">Xp</span>
|
||||
<span class="k">elif</span> <span class="n">return_type</span><span class="o">==</span><span class="s1">'labelled_collection'</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="k">lambda</span> <span class="n">lc</span><span class="p">:</span><span class="n">lc</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="APP"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP">[docs]</a><span class="k">class</span> <span class="nc">APP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Implementation of the artificial prevalence protocol (APP).</span>
|
||||
<span class="sd"> The APP consists of exploring a grid of prevalence values containing `n_prevalences` points (e.g.,</span>
|
||||
<span class="sd"> [0, 0.05, 0.1, 0.15, ..., 1], if `n_prevalences=21`), and generating all valid combinations of</span>
|
||||
<span class="sd"> prevalence values for all classes (e.g., for 3 classes, samples with [0, 0, 1], [0, 0.05, 0.95], ...,</span>
|
||||
<span class="sd"> [1, 0, 0] prevalence values of size `sample_size` will be yielded). The number of samples for each valid</span>
|
||||
<span class="sd"> combination of prevalence values is indicated by `repeats`.</span>
|
||||
|
||||
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
|
||||
<span class="sd"> :param sample_size: integer, number of instances in each sample; if None (default) then it is taken from</span>
|
||||
<span class="sd"> qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.</span>
|
||||
<span class="sd"> :param n_prevalences: the number of equidistant prevalence points to extract from the [0,1] interval for the</span>
|
||||
<span class="sd"> grid (default is 21)</span>
|
||||
<span class="sd"> :param repeats: number of copies for each valid prevalence vector (default is 10)</span>
|
||||
<span class="sd"> :param smooth_limits_epsilon: the quantity to add and subtract to the limits 0 and 1</span>
|
||||
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
|
||||
<span class="sd"> will be the same every time the protocol is called)</span>
|
||||
<span class="sd"> :param sanity_check: int, raises an exception warning the user that the number of examples to be generated exceed</span>
|
||||
<span class="sd"> this number; set to None for skipping this check</span>
|
||||
<span class="sd"> :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
|
||||
<span class="sd"> to "labelled_collection" to get instead instances of LabelledCollection</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">21</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
|
||||
<span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="mi">10000</span><span class="p">,</span> <span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">APP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span> <span class="o">=</span> <span class="n">n_prevalences</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">smooth_limits_epsilon</span> <span class="o">=</span> <span class="n">smooth_limits_epsilon</span>
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="p">((</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">sanity_check</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">and</span> <span class="n">sanity_check</span><span class="o">></span><span class="mi">0</span><span class="p">)</span> <span class="ow">or</span> <span class="n">sanity_check</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">'param "sanity_check" must either be None or a positive integer'</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sanity_check</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="n">n</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="o">=</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">n</span> <span class="o">></span> <span class="n">sanity_check</span><span class="p">:</span>
|
||||
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
|
||||
<span class="sa">f</span><span class="s2">"Abort: the number of samples that will be generated by </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2"> (</span><span class="si">{</span><span class="n">n</span><span class="si">}</span><span class="s2">) "</span>
|
||||
<span class="sa">f</span><span class="s2">"exceeds the maximum number of allowed samples (</span><span class="si">{</span><span class="n">sanity_check</span><span class="w"> </span><span class="si">= }</span><span class="s2">). Set 'sanity_check' to "</span>
|
||||
<span class="sa">f</span><span class="s2">"None, or to a higher number, for bypassing this check."</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="APP.prevalence_grid"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.prevalence_grid">[docs]</a> <span class="k">def</span> <span class="nf">prevalence_grid</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generates vectors of prevalence values from an exhaustive grid of prevalence values. The</span>
|
||||
<span class="sd"> number of prevalence values explored for each dimension depends on `n_prevalences`, so that, if, for example,</span>
|
||||
<span class="sd"> `n_prevalences=11` then the prevalence values of the grid are taken from [0, 0.1, 0.2, ..., 0.9, 1]. Only</span>
|
||||
<span class="sd"> valid prevalence distributions are returned, i.e., vectors of prevalence values that sum up to 1. For each</span>
|
||||
<span class="sd"> valid vector of prevalence values, `repeat` copies are returned. The vector of prevalence values can be</span>
|
||||
<span class="sd"> implicit (by setting `return_constrained_dim=False`), meaning that the last dimension (which is constrained</span>
|
||||
<span class="sd"> to 1 - sum of the rest) is not returned (note that, quite obviously, in this case the vector does not sum up to</span>
|
||||
<span class="sd"> 1). Note that this method is deterministic, i.e., there is no random sampling anywhere.</span>
|
||||
|
||||
<span class="sd"> :return: a `np.ndarray` of shape `(n, dimensions)` if `return_constrained_dim=True` or of shape</span>
|
||||
<span class="sd"> `(n, dimensions-1)` if `return_constrained_dim=False`, where `n` is the number of valid combinations found</span>
|
||||
<span class="sd"> in the grid multiplied by `repeat`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">dimensions</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span>
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">prevalence_linspace</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">smooth_limits_epsilon</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">smooth_limits_epsilon</span><span class="p">)</span>
|
||||
<span class="n">eps</span> <span class="o">=</span> <span class="p">(</span><span class="n">s</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">/</span><span class="mi">2</span> <span class="c1"># handling floating rounding</span>
|
||||
<span class="n">s</span> <span class="o">=</span> <span class="p">[</span><span class="n">s</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="n">dimensions</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[</span><span class="n">p</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">itertools</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="o">*</span><span class="n">s</span><span class="p">,</span> <span class="n">repeat</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> <span class="k">if</span> <span class="p">(</span><span class="nb">sum</span><span class="p">(</span><span class="n">p</span><span class="p">)</span> <span class="o"><</span> <span class="p">(</span><span class="mf">1.</span><span class="o">+</span><span class="n">eps</span><span class="p">))]</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">prevs</span><span class="p">),</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">prevs</span></div>
|
||||
|
||||
<div class="viewcode-block" id="APP.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.samples_parameters">[docs]</a> <span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the APP protocol.</span>
|
||||
|
||||
<span class="sd"> :return: a list of indexes that realize the APP sampling</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">prevs</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">prevalence_grid</span><span class="p">():</span>
|
||||
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">)</span>
|
||||
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">indexes</span></div>
|
||||
|
||||
<div class="viewcode-block" id="APP.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Realizes the sample given the index of the instances.</span>
|
||||
|
||||
<span class="sd"> :param index: indexes of the instances to select</span>
|
||||
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="APP.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.APP.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the number of samples that will be generated</span>
|
||||
|
||||
<span class="sd"> :return: int</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n_prevalences</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="NPP"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP">[docs]</a><span class="k">class</span> <span class="nc">NPP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> A generator of samples that implements the natural prevalence protocol (NPP). The NPP consists of drawing</span>
|
||||
<span class="sd"> samples uniformly at random, therefore approximately preserving the natural prevalence of the collection.</span>
|
||||
|
||||
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
|
||||
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
|
||||
<span class="sd"> qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.</span>
|
||||
<span class="sd"> :param repeats: the number of samples to generate. Default is 100.</span>
|
||||
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
|
||||
<span class="sd"> will be the same every time the protocol is called)</span>
|
||||
<span class="sd"> :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
|
||||
<span class="sd"> to "labelled_collection" to get instead instances of LabelledCollection</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span><span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
||||
<span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">NPP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="NPP.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.samples_parameters">[docs]</a> <span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the NPP protocol.</span>
|
||||
|
||||
<span class="sd"> :return: a list of indexes that realize the NPP sampling</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
|
||||
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">uniform_sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">)</span>
|
||||
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">indexes</span></div>
|
||||
|
||||
<div class="viewcode-block" id="NPP.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Realizes the sample given the index of the instances.</span>
|
||||
|
||||
<span class="sd"> :param index: indexes of the instances to select</span>
|
||||
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="NPP.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.NPP.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the number of samples that will be generated (equals to "repeats")</span>
|
||||
|
||||
<span class="sd"> :return: int</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="UPP"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP">[docs]</a><span class="k">class</span> <span class="nc">UPP</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">,</span> <span class="n">OnLabelledCollectionProtocol</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> A variant of :class:`APP` that, instead of using a grid of equidistant prevalence values,</span>
|
||||
<span class="sd"> relies on the Kraemer algorithm for sampling unit (k-1)-simplex uniformly at random, with</span>
|
||||
<span class="sd"> k the number of classes. This protocol covers the entire range of prevalence values in a</span>
|
||||
<span class="sd"> statistical sense, i.e., unlike APP there is no guarantee that it is covered precisely</span>
|
||||
<span class="sd"> equally for all classes, but it is preferred in cases in which the number of possible</span>
|
||||
<span class="sd"> combinations of the grid values of APP makes this endeavour intractable.</span>
|
||||
|
||||
<span class="sd"> :param data: a `LabelledCollection` from which the samples will be drawn</span>
|
||||
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
|
||||
<span class="sd"> qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.</span>
|
||||
<span class="sd"> :param repeats: the number of samples to generate. Default is 100.</span>
|
||||
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
|
||||
<span class="sd"> will be the same every time the protocol is called)</span>
|
||||
<span class="sd"> :param return_type: set to "sample_prev" (default) to get the pairs of (sample, prevalence) at each iteration, or</span>
|
||||
<span class="sd"> to "labelled_collection" to get instead instances of LabelledCollection</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
||||
<span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">UPP</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">data</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="UPP.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.samples_parameters">[docs]</a> <span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the UPP protocol.</span>
|
||||
|
||||
<span class="sd"> :return: a list of indexes that realize the UPP sampling</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">indexes</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">prevs</span> <span class="ow">in</span> <span class="n">F</span><span class="o">.</span><span class="n">uniform_simplex_sampling</span><span class="p">(</span><span class="n">n_classes</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
|
||||
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="p">,</span> <span class="o">*</span><span class="n">prevs</span><span class="p">)</span>
|
||||
<span class="n">indexes</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">indexes</span></div>
|
||||
|
||||
<div class="viewcode-block" id="UPP.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Realizes the sample given the index of the instances.</span>
|
||||
|
||||
<span class="sd"> :param index: indexes of the instances to select</span>
|
||||
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span></div>
|
||||
|
||||
<div class="viewcode-block" id="UPP.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.UPP.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the number of samples that will be generated (equals to "repeats")</span>
|
||||
|
||||
<span class="sd"> :return: int</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span></div></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="DomainMixer"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer">[docs]</a><span class="k">class</span> <span class="nc">DomainMixer</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Generates mixtures of two domains (A and B) at controlled rates, but preserving the original class prevalence.</span>
|
||||
|
||||
<span class="sd"> :param domainA: one domain, an object of :class:`qp.data.LabelledCollection`</span>
|
||||
<span class="sd"> :param domainB: another domain, an object of :class:`qp.data.LabelledCollection`</span>
|
||||
<span class="sd"> :param sample_size: integer, the number of instances in each sample; if None (default) then it is taken from</span>
|
||||
<span class="sd"> qp.environ["SAMPLE_SIZE"]. If this is not set, a ValueError exception is raised.</span>
|
||||
<span class="sd"> :param repeats: int, number of samples to draw for every mixture rate</span>
|
||||
<span class="sd"> :param prevalence: the prevalence to preserv along the mixtures. If specified, should be an array containing</span>
|
||||
<span class="sd"> one prevalence value (positive float) for each class and summing up to one. If not specified, the prevalence</span>
|
||||
<span class="sd"> will be taken from the domain A (default).</span>
|
||||
<span class="sd"> :param mixture_points: an integer indicating the number of points to take from a linear scale (e.g., 21 will</span>
|
||||
<span class="sd"> generate the mixture points [1, 0.95, 0.9, ..., 0]), or the array of mixture values itself.</span>
|
||||
<span class="sd"> the specific points</span>
|
||||
<span class="sd"> :param random_state: allows replicating samples across runs (default 0, meaning that the sequence of samples</span>
|
||||
<span class="sd"> will be the same every time the protocol is called)</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
|
||||
<span class="bp">self</span><span class="p">,</span>
|
||||
<span class="n">domainA</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span>
|
||||
<span class="n">domainB</span><span class="p">:</span> <span class="n">LabelledCollection</span><span class="p">,</span>
|
||||
<span class="n">sample_size</span><span class="p">,</span>
|
||||
<span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
|
||||
<span class="n">prevalence</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
|
||||
<span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span>
|
||||
<span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
|
||||
<span class="n">return_type</span><span class="o">=</span><span class="s1">'sample_prev'</span><span class="p">):</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">DomainMixer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">A</span> <span class="o">=</span> <span class="n">domainA</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">B</span> <span class="o">=</span> <span class="n">domainB</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">_get_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">=</span> <span class="n">repeats</span>
|
||||
<span class="k">if</span> <span class="n">prevalence</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span> <span class="o">=</span> <span class="n">domainA</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevalence</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span> <span class="o">==</span> <span class="n">domainA</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> \
|
||||
<span class="sa">f</span><span class="s1">'wrong shape for the vector prevalence (expected </span><span class="si">{</span><span class="n">domainA</span><span class="o">.</span><span class="n">n_classes</span><span class="si">}</span><span class="s1">)'</span>
|
||||
<span class="k">assert</span> <span class="n">F</span><span class="o">.</span><span class="n">check_prevalence_vector</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">),</span> \
|
||||
<span class="sa">f</span><span class="s1">'the prevalence vector is not valid (either it contains values outside [0,1] or does not sum up to 1)'</span>
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">mixture_points</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">mixture_points</span><span class="p">)[::</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">mixture_points</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">logical_and</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span> <span class="o">>=</span> <span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="o"><=</span><span class="mi">1</span><span class="p">)),</span> \
|
||||
<span class="s1">'mixture_model datatype not understood (expected int or a sequence of real values in [0,1])'</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">random_state</span> <span class="o">=</span> <span class="n">random_state</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">collator</span> <span class="o">=</span> <span class="n">OnLabelledCollectionProtocol</span><span class="o">.</span><span class="n">get_collator</span><span class="p">(</span><span class="n">return_type</span><span class="p">)</span>
|
||||
|
||||
<div class="viewcode-block" id="DomainMixer.samples_parameters"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.samples_parameters">[docs]</a> <span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Return all the necessary parameters to replicate the samples as according to the this protocol.</span>
|
||||
|
||||
<span class="sd"> :return: a list of zipped indexes (from A and B) that realize the sampling</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span> <span class="o">=</span> <span class="p">[],</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">propA</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="p">:</span>
|
||||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">repeats</span><span class="p">):</span>
|
||||
<span class="n">nA</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span> <span class="o">*</span> <span class="n">propA</span><span class="p">))</span>
|
||||
<span class="n">nB</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">sample_size</span><span class="o">-</span><span class="n">nA</span>
|
||||
<span class="n">sampleAidx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">nA</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span>
|
||||
<span class="n">sampleBidx</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">B</span><span class="o">.</span><span class="n">sampling_index</span><span class="p">(</span><span class="n">nB</span><span class="p">,</span> <span class="o">*</span><span class="bp">self</span><span class="o">.</span><span class="n">prevalence</span><span class="p">)</span>
|
||||
<span class="n">indexesA</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sampleAidx</span><span class="p">)</span>
|
||||
<span class="n">indexesB</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sampleBidx</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">zip</span><span class="p">(</span><span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span><span class="p">))</span></div>
|
||||
|
||||
<div class="viewcode-block" id="DomainMixer.sample"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">indexes</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Realizes the sample given a pair of indexes of the instances from A and B.</span>
|
||||
|
||||
<span class="sd"> :param indexes: indexes of the instances to select from A and B</span>
|
||||
<span class="sd"> :return: an instance of :class:`qp.data.LabelledCollection`</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">indexesA</span><span class="p">,</span> <span class="n">indexesB</span> <span class="o">=</span> <span class="n">indexes</span>
|
||||
<span class="n">sampleA</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">A</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">indexesA</span><span class="p">)</span>
|
||||
<span class="n">sampleB</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">B</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">indexesB</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">sampleA</span><span class="o">+</span><span class="n">sampleB</span></div>
|
||||
|
||||
<div class="viewcode-block" id="DomainMixer.total"><a class="viewcode-back" href="../../quapy.html#quapy.protocol.DomainMixer.total">[docs]</a> <span class="k">def</span> <span class="nf">total</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Returns the number of samples that will be generated (equals to "repeats * mixture_points")</span>
|
||||
|
||||
<span class="sd"> :return: int</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">repeats</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">mixture_points</span><span class="p">)</span></div></div>
|
||||
|
||||
|
||||
<span class="c1"># aliases</span>
|
||||
|
||||
<span class="n">ArtificialPrevalenceProtocol</span> <span class="o">=</span> <span class="n">APP</span>
|
||||
<span class="n">NaturalPrevalenceProtocol</span> <span class="o">=</span> <span class="n">NPP</span>
|
||||
<span class="n">UniformPrevalenceProtocol</span> <span class="o">=</span> <span class="n">UPP</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_base — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_base</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_base</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">pytest</span>
|
||||
|
||||
<div class="viewcode-block" id="test_import">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_base.test_import">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_import</span><span class="p">():</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="k">assert</span> <span class="n">qp</span><span class="o">.</span><span class="n">__version__</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span></div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,178 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_datasets — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_datasets</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_datasets</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">pytest</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.data.datasets</span> <span class="kn">import</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">,</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span><span class="p">,</span> \
|
||||
<span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="p">,</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">,</span> <span class="n">LEQUA2022_TASKS</span><span class="p">,</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">,</span>\
|
||||
<span class="n">fetch_reviews</span><span class="p">,</span> <span class="n">fetch_twitter</span><span class="p">,</span> <span class="n">fetch_UCIBinaryDataset</span><span class="p">,</span> <span class="n">fetch_lequa2022</span><span class="p">,</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_fetch_reviews">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_reviews">[docs]</a>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">REVIEWS_SENTIMENT_DATASETS</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">test_fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Training set stats'</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test set stats'</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_fetch_twitter">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_twitter">[docs]</a>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TEST</span> <span class="o">+</span> <span class="n">TWITTER_SENTIMENT_DATASETS_TRAIN</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">test_fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">ValueError</span> <span class="k">as</span> <span class="n">ve</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'semeval'</span> <span class="ow">and</span> <span class="n">ve</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span>
|
||||
<span class="s1">'dataset "semeval" can only be used for model selection.'</span><span class="p">):</span>
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_twitter</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">,</span> <span class="n">for_model_selection</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Training set stats'</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test set stats'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_fetch_UCIDataset">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_UCIDataset">[docs]</a>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">UCI_BINARY_DATASETS</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">test_fetch_UCIDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
||||
<span class="k">except</span> <span class="ne">FileNotFoundError</span> <span class="k">as</span> <span class="n">fnfe</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">dataset_name</span> <span class="o">==</span> <span class="s1">'pageblocks.5'</span> <span class="ow">and</span> <span class="n">fnfe</span><span class="o">.</span><span class="n">args</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">find</span><span class="p">(</span>
|
||||
<span class="s1">'If this is the first time you attempt to load this dataset'</span><span class="p">)</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'The pageblocks.5 dataset requires some hand processing to be usable, skipping this test.'</span><span class="p">)</span>
|
||||
<span class="k">return</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Training set stats'</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test set stats'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_fetch_UCIMultiDataset">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_UCIMultiDataset">[docs]</a>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">UCI_MULTICLASS_DATASETS</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">test_fetch_UCIMultiDataset</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">fetch_UCIMulticlassLabelledCollection</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'Dataset </span><span class="si">{</span><span class="n">dataset_name</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Training set stats'</span><span class="p">)</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">stats</span><span class="p">()</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test set stats'</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_fetch_lequa2022">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_datasets.test_fetch_lequa2022">[docs]</a>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset_name'</span><span class="p">,</span> <span class="n">LEQUA2022_TASKS</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">test_fetch_lequa2022</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">):</span>
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">gen_val</span><span class="p">,</span> <span class="n">gen_test</span> <span class="o">=</span> <span class="n">fetch_lequa2022</span><span class="p">(</span><span class="n">dataset_name</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">train</span><span class="o">.</span><span class="n">stats</span><span class="p">())</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Val:'</span><span class="p">,</span> <span class="n">gen_val</span><span class="o">.</span><span class="n">total</span><span class="p">())</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'Test:'</span><span class="p">,</span> <span class="n">gen_test</span><span class="o">.</span><span class="n">total</span><span class="p">())</span></div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,195 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_evaluation — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_evaluation</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_evaluation</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.error</span> <span class="kn">import</span> <span class="n">QUANTIFICATION_ERROR_SINGLE</span><span class="p">,</span> <span class="n">QUANTIFICATION_ERROR</span><span class="p">,</span> <span class="n">QUANTIFICATION_ERROR_NAMES</span><span class="p">,</span> \
|
||||
<span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">EMQ</span><span class="p">,</span> <span class="n">PCC</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BaseQuantifier</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EvalTestCase">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">EvalTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="EvalTestCase.test_eval_speedup">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase.test_eval_speedup">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_eval_speedup</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'hp'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">test</span>
|
||||
|
||||
<span class="n">protocol</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">predict_proba</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">):</span>
|
||||
<span class="kn">import</span> <span class="nn">time</span>
|
||||
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">predict_proba</span><span class="p">(</span><span class="n">X</span><span class="p">)</span>
|
||||
|
||||
<span class="n">emq</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
||||
|
||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
||||
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">emq</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">aggr_speedup</span><span class="o">=</span><span class="s1">'force'</span><span class="p">)</span>
|
||||
<span class="n">tend_optim</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'evaluation (with optimization) took </span><span class="si">{</span><span class="n">tend_optim</span><span class="si">}</span><span class="s1">s [MAE=</span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">]'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">class</span> <span class="nc">NonAggregativeEMQ</span><span class="p">(</span><span class="n">BaseQuantifier</span><span class="p">):</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">cls</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">emq</span> <span class="o">=</span> <span class="n">EMQ</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">quantify</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instances</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">emq</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">emq</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span>
|
||||
|
||||
<span class="n">emq</span> <span class="o">=</span> <span class="n">NonAggregativeEMQ</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
||||
|
||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span>
|
||||
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">emq</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">tend_no_optim</span> <span class="o">=</span> <span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'evaluation (w/o optimization) took </span><span class="si">{</span><span class="n">tend_no_optim</span><span class="si">}</span><span class="s1">s [MAE=</span><span class="si">{</span><span class="n">score</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">]'</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">tend_no_optim</span><span class="o">></span><span class="p">(</span><span class="n">tend_optim</span><span class="o">/</span><span class="mi">2</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EvalTestCase.test_evaluation_output">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_evaluation.EvalTestCase.test_evaluation_output">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_evaluation_output</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'hp'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">test</span>
|
||||
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span><span class="o">=</span><span class="mi">100</span>
|
||||
|
||||
<span class="n">protocol</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">protocol</span><span class="o">.</span><span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PCC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
||||
|
||||
<span class="n">single_errors</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">QUANTIFICATION_ERROR_SINGLE_NAMES</span><span class="p">)</span>
|
||||
<span class="n">averaged_errors</span> <span class="o">=</span> <span class="p">[</span><span class="s1">'m'</span><span class="o">+</span><span class="n">e</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">single_errors</span><span class="p">]</span>
|
||||
<span class="n">single_errors</span> <span class="o">=</span> <span class="n">single_errors</span> <span class="o">+</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">single_errors</span><span class="p">]</span>
|
||||
<span class="n">averaged_errors</span> <span class="o">=</span> <span class="n">averaged_errors</span> <span class="o">+</span> <span class="p">[</span><span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">from_name</span><span class="p">(</span><span class="n">e</span><span class="p">)</span> <span class="k">for</span> <span class="n">e</span> <span class="ow">in</span> <span class="n">averaged_errors</span><span class="p">]</span>
|
||||
<span class="k">for</span> <span class="n">error_metric</span><span class="p">,</span> <span class="n">averaged_error_metric</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">single_errors</span><span class="p">,</span> <span class="n">averaged_errors</span><span class="p">):</span>
|
||||
<span class="n">score</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="n">averaged_error_metric</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">score</span><span class="p">,</span> <span class="nb">float</span><span class="p">))</span>
|
||||
|
||||
<span class="n">scores</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">evaluate</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">protocol</span><span class="p">,</span> <span class="n">error_metric</span><span class="o">=</span><span class="n">error_metric</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">scores</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">))</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">scores</span><span class="o">.</span><span class="n">mean</span><span class="p">(),</span> <span class="n">score</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_hierarchy — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_hierarchy</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_hierarchy</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="o">*</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="HierarchyTestCase">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">HierarchyTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
||||
|
||||
<div class="viewcode-block" id="HierarchyTestCase.test_aggregative">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_aggregative">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_aggregative</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">CC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PCC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">ACC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="HierarchyTestCase.test_binary">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_binary">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_binary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">HDy</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="HierarchyTestCase.test_probabilistic">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_hierarchy.HierarchyTestCase.test_probabilistic">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_probabilistic</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">()</span>
|
||||
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">CC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">ACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeCrispQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">),</span> <span class="kc">False</span><span class="p">)</span>
|
||||
<span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="p">[</span><span class="n">PCC</span><span class="p">(</span><span class="n">lr</span><span class="p">),</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)]:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeCrispQuantifier</span><span class="p">),</span> <span class="kc">False</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">AggregativeSoftQuantifier</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,176 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_labelcollection — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_labelcollection</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_labelcollection</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">scipy.sparse</span> <span class="kn">import</span> <span class="n">csr_matrix</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelCollectionTestCase">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">LabelCollectionTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
||||
<div class="viewcode-block" id="LabelCollectionTestCase.test_split">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase.test_split">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_split</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">100</span><span class="p">)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="n">y</span><span class="p">)</span>
|
||||
<span class="n">tr</span><span class="p">,</span> <span class="n">te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_random</span><span class="p">(</span><span class="mf">0.7</span><span class="p">)</span>
|
||||
<span class="n">check_prev</span> <span class="o">=</span> <span class="n">tr</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span><span class="o">*</span><span class="mf">0.7</span> <span class="o">+</span> <span class="n">te</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span><span class="o">*</span><span class="mf">0.3</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tr</span><span class="p">),</span> <span class="mi">70</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">te</span><span class="p">),</span> <span class="mi">30</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">allclose</span><span class="p">(</span><span class="n">check_prev</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()),</span> <span class="kc">True</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">tr</span><span class="o">+</span><span class="n">te</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="LabelCollectionTestCase.test_join">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_labelcollection.LabelCollectionTestCase.test_join">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_join</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">50</span><span class="p">)</span>
|
||||
<span class="n">data1</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">200</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
|
||||
<span class="n">data2</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
|
||||
<span class="n">data3</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
|
||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data1</span><span class="p">,</span> <span class="n">data2</span><span class="p">,</span> <span class="n">data3</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data1</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data2</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data3</span><span class="p">))</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">all</span><span class="p">(</span><span class="n">combined</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">6</span><span class="p">)),</span> <span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
||||
<span class="n">data4</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data1</span><span class="p">,</span> <span class="n">data2</span><span class="p">,</span> <span class="n">data3</span><span class="p">,</span> <span class="n">data4</span><span class="p">)</span>
|
||||
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">20</span><span class="p">)</span>
|
||||
<span class="n">data5</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data4</span><span class="p">)</span><span class="o">+</span><span class="nb">len</span><span class="p">(</span><span class="n">data5</span><span class="p">))</span>
|
||||
|
||||
<span class="n">x</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
||||
<span class="n">data6</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">,</span> <span class="n">data6</span><span class="p">)</span>
|
||||
|
||||
<span class="n">data4</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">csr_matrix</span><span class="p">(</span><span class="n">data4</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
|
||||
<span class="n">data5</span><span class="o">.</span><span class="n">instances</span> <span class="o">=</span> <span class="n">csr_matrix</span><span class="p">(</span><span class="n">data5</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">combined</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">LabelledCollection</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">data4</span><span class="p">,</span> <span class="n">data5</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">combined</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">data4</span><span class="p">)</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">data5</span><span class="p">))</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,357 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_methods — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_methods</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_methods</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">import</span> <span class="nn">pytest</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.svm</span> <span class="kn">import</span> <span class="n">LinearSVC</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">method.aggregative</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.base</span> <span class="kn">import</span> <span class="n">BinaryQuantifier</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method</span> <span class="kn">import</span> <span class="n">AGGREGATIVE_METHODS</span><span class="p">,</span> <span class="n">NON_AGGREGATIVE_METHODS</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">Ensemble</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">DMy</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">MedianEstimator</span>
|
||||
|
||||
<span class="c1"># datasets = [pytest.param(qp.datasets.fetch_twitter('hcr', pickle=True), id='hcr'),</span>
|
||||
<span class="c1"># pytest.param(qp.datasets.fetch_UCIDataset('ionosphere'), id='ionosphere')]</span>
|
||||
|
||||
<span class="n">tinydatasets</span> <span class="o">=</span> <span class="p">[</span><span class="n">pytest</span><span class="o">.</span><span class="n">param</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_twitter</span><span class="p">(</span><span class="s1">'hcr'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(),</span> <span class="nb">id</span><span class="o">=</span><span class="s1">'tiny_hcr'</span><span class="p">),</span>
|
||||
<span class="n">pytest</span><span class="o">.</span><span class="n">param</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="s1">'ionosphere'</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(),</span> <span class="nb">id</span><span class="o">=</span><span class="s1">'tiny_ionosphere'</span><span class="p">)]</span>
|
||||
|
||||
<span class="n">learners</span> <span class="o">=</span> <span class="p">[</span><span class="n">LogisticRegression</span><span class="p">,</span> <span class="n">LinearSVC</span><span class="p">]</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_aggregative_methods">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_aggregative_methods">[docs]</a>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset'</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'aggregative_method'</span><span class="p">,</span> <span class="n">AGGREGATIVE_METHODS</span><span class="p">)</span>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'learner'</span><span class="p">,</span> <span class="n">learners</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">test_aggregative_methods</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">aggregative_method</span><span class="p">,</span> <span class="n">learner</span><span class="p">):</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">aggregative_method</span><span class="p">(</span><span class="n">learner</span><span class="p">())</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'skipping the test of binary model </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">model</span><span class="p">)</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">return</span>
|
||||
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||
|
||||
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
|
||||
|
||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_non_aggregative_methods">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_non_aggregative_methods">[docs]</a>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset'</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'non_aggregative_method'</span><span class="p">,</span> <span class="n">NON_AGGREGATIVE_METHODS</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">test_non_aggregative_methods</span><span class="p">(</span><span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">non_aggregative_method</span><span class="p">):</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">non_aggregative_method</span><span class="p">()</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'skipping the test of binary model </span><span class="si">{</span><span class="n">model</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">return</span>
|
||||
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||
|
||||
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
|
||||
|
||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_ensemble_method">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_ensemble_method">[docs]</a>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'base_method'</span><span class="p">,</span> <span class="p">[</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">ACC</span><span class="p">,</span> <span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">PACC</span><span class="p">])</span>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'learner'</span><span class="p">,</span> <span class="p">[</span><span class="n">LogisticRegression</span><span class="p">])</span>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'dataset'</span><span class="p">,</span> <span class="n">tinydatasets</span><span class="p">)</span>
|
||||
<span class="nd">@pytest</span><span class="o">.</span><span class="n">mark</span><span class="o">.</span><span class="n">parametrize</span><span class="p">(</span><span class="s1">'policy'</span><span class="p">,</span> <span class="n">Ensemble</span><span class="o">.</span><span class="n">VALID_POLICIES</span><span class="p">)</span>
|
||||
<span class="k">def</span> <span class="nf">test_ensemble_method</span><span class="p">(</span><span class="n">base_method</span><span class="p">,</span> <span class="n">learner</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">Dataset</span><span class="p">,</span> <span class="n">policy</span><span class="p">):</span>
|
||||
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">20</span>
|
||||
|
||||
<span class="n">base_quantifier</span><span class="o">=</span><span class="n">base_method</span><span class="p">(</span><span class="n">learner</span><span class="p">())</span>
|
||||
|
||||
<span class="k">if</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span> <span class="ow">and</span> <span class="n">policy</span><span class="o">==</span><span class="s1">'ds'</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'skipping the test of binary policy ds on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">return</span>
|
||||
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">Ensemble</span><span class="p">(</span><span class="n">quantifier</span><span class="o">=</span><span class="n">base_quantifier</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">policy</span><span class="o">=</span><span class="n">policy</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||
|
||||
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
|
||||
|
||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_quanet_method">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_quanet_method">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_quanet_method</span><span class="p">():</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy.classification.neural</span>
|
||||
<span class="k">except</span> <span class="ne">ModuleNotFoundError</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'skipping QuaNet test due to missing torch package'</span><span class="p">)</span>
|
||||
<span class="k">return</span>
|
||||
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
|
||||
|
||||
<span class="c1"># load the kindle dataset as text, and convert words to numerical indexes</span>
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'kindle'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">reduce</span><span class="p">(</span><span class="mi">200</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">preprocessing</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.classification.neural</span> <span class="kn">import</span> <span class="n">CNNnet</span>
|
||||
<span class="n">cnn</span> <span class="o">=</span> <span class="n">CNNnet</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">vocabulary_size</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">n_classes</span><span class="p">)</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.classification.neural</span> <span class="kn">import</span> <span class="n">NeuralClassifierTrainer</span>
|
||||
<span class="n">learner</span> <span class="o">=</span> <span class="n">NeuralClassifierTrainer</span><span class="p">(</span><span class="n">cnn</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span>
|
||||
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.meta</span> <span class="kn">import</span> <span class="n">QuaNet</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">QuaNet</span><span class="p">(</span><span class="n">learner</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s1">'cuda'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">BinaryQuantifier</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">dataset</span><span class="o">.</span><span class="n">binary</span><span class="p">:</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'skipping the test of binary model </span><span class="si">{</span><span class="n">model</span><span class="si">}</span><span class="s1"> on non-binary dataset </span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="k">return</span>
|
||||
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||
|
||||
<span class="n">estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
|
||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">estim_prevalences</span><span class="p">)</span>
|
||||
|
||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_str_label_names">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_str_label_names">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_str_label_names</span><span class="p">():</span>
|
||||
<span class="n">model</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">method</span><span class="o">.</span><span class="n">aggregative</span><span class="o">.</span><span class="n">CC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
||||
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">pickle</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">1000</span><span class="p">,</span> <span class="o">*</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()),</span>
|
||||
<span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">1000</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">))</span>
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">preprocessing</span><span class="o">.</span><span class="n">text2tfidf</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||
|
||||
<span class="n">int_estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">int_estim_prevalences</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
|
||||
|
||||
<span class="n">dataset_str</span> <span class="o">=</span> <span class="n">Dataset</span><span class="p">(</span><span class="n">LabelledCollection</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span>
|
||||
<span class="p">[</span><span class="s1">'one'</span> <span class="k">if</span> <span class="n">label</span> <span class="o">==</span> <span class="mi">1</span> <span class="k">else</span> <span class="s1">'zero'</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">labels</span><span class="p">]),</span>
|
||||
<span class="n">LabelledCollection</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">,</span>
|
||||
<span class="p">[</span><span class="s1">'one'</span> <span class="k">if</span> <span class="n">label</span> <span class="o">==</span> <span class="mi">1</span> <span class="k">else</span> <span class="s1">'zero'</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">labels</span><span class="p">]))</span>
|
||||
<span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="s1">'wrong indexation'</span>
|
||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">training</span><span class="p">)</span>
|
||||
|
||||
<span class="n">str_estim_prevalences</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">)</span>
|
||||
<span class="n">true_prevalences</span> <span class="o">=</span> <span class="n">dataset_str</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">prevalence</span><span class="p">()</span>
|
||||
|
||||
<span class="n">error</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">,</span> <span class="n">str_estim_prevalences</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="nb">type</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> <span class="o">==</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">true_prevalences</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">int_estim_prevalences</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">str_estim_prevalences</span><span class="p">)</span>
|
||||
|
||||
<span class="n">np</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">assert_almost_equal</span><span class="p">(</span><span class="n">int_estim_prevalences</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
|
||||
<span class="n">str_estim_prevalences</span><span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="n">model</span><span class="o">.</span><span class="n">classes_</span><span class="p">)</span><span class="o">.</span><span class="n">index</span><span class="p">(</span><span class="s1">'one'</span><span class="p">)])</span></div>
|
||||
|
||||
|
||||
<span class="c1"># helper</span>
|
||||
<span class="k">def</span> <span class="nf">__fit_test</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">):</span>
|
||||
<span class="n">quantifier</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">)</span>
|
||||
<span class="n">test_samples</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">test</span><span class="p">)</span>
|
||||
<span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">prediction</span><span class="p">(</span><span class="n">quantifier</span><span class="p">,</span> <span class="n">test_samples</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">qp</span><span class="o">.</span><span class="n">error</span><span class="o">.</span><span class="n">mae</span><span class="p">(</span><span class="n">true_prevs</span><span class="p">,</span> <span class="n">estim_prevs</span><span class="p">),</span> <span class="n">estim_prevs</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_median_meta">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_median_meta">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_median_meta</span><span class="p">():</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> This test compares the performance of the MedianQuantifier with respect to computing the median of the predictions</span>
|
||||
<span class="sd"> of a differently parameterized quantifier. We use the DistributionMatching base quantifier and the median is</span>
|
||||
<span class="sd"> computed across different values of nbins</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
|
||||
|
||||
<span class="c1"># grid of values</span>
|
||||
<span class="n">nbins_grid</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">11</span><span class="p">))</span>
|
||||
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="s1">'kindle'</span>
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="n">errors</span> <span class="o">=</span> <span class="p">[]</span>
|
||||
<span class="k">for</span> <span class="n">nbins</span> <span class="ow">in</span> <span class="n">nbins_grid</span><span class="p">:</span>
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">nbins</span><span class="o">=</span><span class="n">nbins</span><span class="p">)</span>
|
||||
<span class="n">mae</span><span class="p">,</span> <span class="n">estim_prevs</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
|
||||
<span class="n">prevs</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">estim_prevs</span><span class="p">)</span>
|
||||
<span class="n">errors</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">mae</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">dataset</span><span class="si">}</span><span class="s1"> DistributionMatching(nbins=</span><span class="si">{</span><span class="n">nbins</span><span class="si">}</span><span class="s1">) got MAE </span><span class="si">{</span><span class="n">mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
<span class="n">prevs</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">prevs</span><span class="p">)</span>
|
||||
<span class="n">mae</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">errors</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">'nbins'</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">median_mae</span><span class="p">,</span> <span class="n">prev</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">median_mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">np</span><span class="o">.</span><span class="n">testing</span><span class="o">.</span><span class="n">assert_almost_equal</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">median</span><span class="p">(</span><span class="n">prevs</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">),</span> <span class="n">prev</span><span class="p">)</span>
|
||||
<span class="k">assert</span> <span class="n">median_mae</span> <span class="o"><</span> <span class="n">mae</span><span class="p">,</span> <span class="s1">'the median-based quantifier provided a higher error...'</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="test_median_meta_modsel">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_methods.test_median_meta_modsel">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_median_meta_modsel</span><span class="p">():</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> This test checks the median-meta quantifier with model selection</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">100</span>
|
||||
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="s1">'kindle'</span>
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">val</span> <span class="o">=</span> <span class="n">train</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
|
||||
<span class="n">nbins_grid</span> <span class="o">=</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">15</span><span class="p">]</span>
|
||||
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">'nbins'</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">median_mae</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">median_mae</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">DMy</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">())</span>
|
||||
<span class="n">lr_params</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">)}</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">MedianEstimator</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="p">{</span><span class="s1">'nbins'</span><span class="p">:</span> <span class="n">nbins_grid</span><span class="p">},</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="o">=</span><span class="n">lr_params</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">APP</span><span class="p">(</span><span class="n">val</span><span class="p">),</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">optimized_median_ave</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">__fit_test</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">train</span><span class="p">,</span> <span class="n">test</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'</span><span class="se">\t</span><span class="s1">MAE=</span><span class="si">{</span><span class="n">optimized_median_ave</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">assert</span> <span class="n">optimized_median_ave</span> <span class="o"><</span> <span class="n">median_mae</span><span class="p">,</span> <span class="s2">"the optimized method yielded worse performance..."</span></div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,225 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_modsel — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_modsel</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_modsel</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.svm</span> <span class="kn">import</span> <span class="n">SVC</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">PACC</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.model_selection</span> <span class="kn">import</span> <span class="n">GridSearchQ</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span>
|
||||
<span class="kn">import</span> <span class="nn">time</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ModselTestCase">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">ModselTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
||||
|
||||
<div class="viewcode-block" id="ModselTestCase.test_modsel">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_modsel</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
||||
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
|
||||
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
|
||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
||||
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'best params'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'best score'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_score_</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">[</span><span class="s1">'classifier__C'</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">()[</span><span class="s1">'classifier__C'</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ModselTestCase.test_modsel_parallel">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_parallel">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_modsel_parallel</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
||||
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="c1"># test = data.test</span>
|
||||
|
||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
|
||||
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
|
||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
||||
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'best params'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'best score'</span><span class="p">,</span> <span class="n">q</span><span class="o">.</span><span class="n">best_score_</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_params_</span><span class="p">[</span><span class="s1">'classifier__C'</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">q</span><span class="o">.</span><span class="n">best_model</span><span class="p">()</span><span class="o">.</span><span class="n">get_params</span><span class="p">()[</span><span class="s1">'classifier__C'</span><span class="p">],</span> <span class="mf">10.0</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ModselTestCase.test_modsel_parallel_speedup">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_parallel_speedup">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_modsel_parallel_speedup</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">)</span>
|
||||
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">5000</span><span class="p">))</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
||||
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">7</span><span class="p">)}</span>
|
||||
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
|
||||
<span class="n">GridSearchQ</span><span class="p">(</span>
|
||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
||||
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||
<span class="n">tend_nooptim</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span><span class="o">-</span><span class="n">tinit</span>
|
||||
|
||||
<span class="n">tinit</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
|
||||
<span class="n">GridSearchQ</span><span class="p">(</span>
|
||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
||||
<span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span>
|
||||
<span class="n">tend_optim</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">-</span> <span class="n">tinit</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'parallel training took </span><span class="si">{</span><span class="n">tend_optim</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s'</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s1">'sequential training took </span><span class="si">{</span><span class="n">tend_nooptim</span><span class="si">:</span><span class="s1">.4f</span><span class="si">}</span><span class="s1">s'</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">tend_optim</span> <span class="o"><</span> <span class="p">(</span><span class="mf">0.5</span><span class="o">*</span><span class="n">tend_nooptim</span><span class="p">),</span> <span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="ModselTestCase.test_modsel_timeout">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_modsel.ModselTestCase.test_modsel_timeout">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_modsel_timeout</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="k">class</span> <span class="nc">SlowLR</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
|
||||
<span class="kn">import</span> <span class="nn">time</span>
|
||||
<span class="n">time</span><span class="o">.</span><span class="n">sleep</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
|
||||
<span class="nb">super</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">sample_weight</span><span class="p">)</span>
|
||||
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">SlowLR</span><span class="p">())</span>
|
||||
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_reviews</span><span class="p">(</span><span class="s1">'imdb'</span><span class="p">,</span> <span class="n">tfidf</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
||||
<span class="n">training</span><span class="p">,</span> <span class="n">validation</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">training</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="c1"># test = data.test</span>
|
||||
|
||||
<span class="n">param_grid</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'classifier__C'</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">logspace</span><span class="p">(</span><span class="o">-</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">7</span><span class="p">)}</span>
|
||||
<span class="n">app</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">validation</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
<span class="n">q</span> <span class="o">=</span> <span class="n">GridSearchQ</span><span class="p">(</span>
|
||||
<span class="n">q</span><span class="p">,</span> <span class="n">param_grid</span><span class="p">,</span> <span class="n">protocol</span><span class="o">=</span><span class="n">app</span><span class="p">,</span> <span class="n">error</span><span class="o">=</span><span class="s1">'mae'</span><span class="p">,</span> <span class="n">refit</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=-</span><span class="mi">1</span><span class="p">,</span> <span class="n">verbose</span><span class="o">=</span><span class="kc">True</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">TimeoutError</span><span class="p">):</span>
|
||||
<span class="n">q</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">training</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,336 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_protocols — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_protocols</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_protocols</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">quapy.functional</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.protocol</span> <span class="kn">import</span> <span class="n">APP</span><span class="p">,</span> <span class="n">NPP</span><span class="p">,</span> <span class="n">UPP</span><span class="p">,</span> <span class="n">DomainMixer</span><span class="p">,</span> <span class="n">AbstractStochasticSeededProtocol</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="mock_labelled_collection">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.mock_labelled_collection">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">mock_labelled_collection</span><span class="p">(</span><span class="n">prefix</span><span class="o">=</span><span class="s1">''</span><span class="p">):</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span> <span class="o">+</span> <span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">*</span> <span class="mi">250</span>
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="p">[</span><span class="n">prefix</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">i</span><span class="p">)</span> <span class="o">+</span> <span class="s1">'-'</span> <span class="o">+</span> <span class="nb">str</span><span class="p">(</span><span class="n">yi</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">yi</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">y</span><span class="p">)]</span>
|
||||
<span class="k">return</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">X</span><span class="p">,</span> <span class="n">y</span><span class="p">,</span> <span class="n">classes</span><span class="o">=</span><span class="nb">sorted</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">y</span><span class="p">)))</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="samples_to_str">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.samples_to_str">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">samples_to_str</span><span class="p">(</span><span class="n">protocol</span><span class="p">):</span>
|
||||
<span class="n">samples_str</span> <span class="o">=</span> <span class="s2">""</span>
|
||||
<span class="k">for</span> <span class="n">instances</span><span class="p">,</span> <span class="n">prev</span> <span class="ow">in</span> <span class="n">protocol</span><span class="p">():</span>
|
||||
<span class="n">samples_str</span> <span class="o">+=</span> <span class="sa">f</span><span class="s1">'</span><span class="si">{</span><span class="n">instances</span><span class="si">}</span><span class="se">\t</span><span class="si">{</span><span class="n">prev</span><span class="si">}</span><span class="se">\n</span><span class="s1">'</span>
|
||||
<span class="k">return</span> <span class="n">samples_str</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">TestProtocols</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_app_sanity_check">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_sanity_check">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_app_sanity_check</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
<span class="n">n_prevpoints</span> <span class="o">=</span> <span class="mi">101</span>
|
||||
<span class="n">repeats</span> <span class="o">=</span> <span class="mi">10</span>
|
||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
||||
<span class="n">n_combinations</span> <span class="o">=</span> \
|
||||
<span class="n">quapy</span><span class="o">.</span><span class="n">functional</span><span class="o">.</span><span class="n">num_prevalence_combinations</span><span class="p">(</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">n_classes</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">n_classes</span><span class="p">,</span> <span class="n">n_repeats</span><span class="o">=</span><span class="n">repeats</span><span class="p">)</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="n">n_combinations</span><span class="p">)</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="n">n_prevpoints</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">,</span> <span class="n">sanity_check</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_app_replicate">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_replicate">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_app_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">)</span> <span class="c1"># <- random_state is by default set to 0</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_app_not_replicate">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_not_replicate">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_app_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_app_number">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_app_number">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_app_number</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">APP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">n_prevalences</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="c1"># surprisingly enough, for some n_prevalences the test fails, notwithstanding</span>
|
||||
<span class="c1"># everything is correct. The problem is that in function APP.prevalence_grid()</span>
|
||||
<span class="c1"># there is sometimes one rounding error that gets cumulated and</span>
|
||||
<span class="c1"># surpasses 1.0 (by a very small float value, 0.0000000000002 or sthe like)</span>
|
||||
<span class="c1"># so these tuples are mistakenly removed... I have tried with np.close, and</span>
|
||||
<span class="c1"># other workarounds, but eventually happens that there is some negative probability</span>
|
||||
<span class="c1"># in the sampling function...</span>
|
||||
|
||||
<span class="n">count</span> <span class="o">=</span> <span class="mi">0</span>
|
||||
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="n">p</span><span class="p">():</span>
|
||||
<span class="n">count</span><span class="o">+=</span><span class="mi">1</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">count</span><span class="p">,</span> <span class="n">p</span><span class="o">.</span><span class="n">total</span><span class="p">())</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_npp_replicate">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_npp_replicate">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_npp_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span> <span class="c1"># <- random_state is by default set to 0</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_npp_not_replicate">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_npp_not_replicate">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_npp_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_kraemer_replicate">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_kraemer_replicate">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_kraemer_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">42</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span> <span class="c1"># <- random_state is by default set to 0</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_kraemer_not_replicate">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_kraemer_not_replicate">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_kraemer_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">UPP</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">repeats</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_covariate_shift_replicate">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_covariate_shift_replicate">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_covariate_shift_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">dataA</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">'domA'</span><span class="p">)</span>
|
||||
<span class="n">dataB</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">'domB'</span><span class="p">)</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span>
|
||||
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">)</span> <span class="c1"># <- random_state is by default set to 0</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_covariate_shift_not_replicate">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_covariate_shift_not_replicate">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_covariate_shift_not_replicate</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="n">dataA</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">'domA'</span><span class="p">)</span>
|
||||
<span class="n">dataB</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">(</span><span class="s1">'domB'</span><span class="p">)</span>
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">DomainMixer</span><span class="p">(</span><span class="n">dataA</span><span class="p">,</span> <span class="n">dataB</span><span class="p">,</span> <span class="n">sample_size</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="n">mixture_points</span><span class="o">=</span><span class="mi">11</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
|
||||
|
||||
<span class="n">samples1</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
<span class="n">samples2</span> <span class="o">=</span> <span class="n">samples_to_str</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">samples1</span><span class="p">,</span> <span class="n">samples2</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="TestProtocols.test_no_seed_init">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_protocols.TestProtocols.test_no_seed_init">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_no_seed_init</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="k">class</span> <span class="nc">NoSeedInit</span><span class="p">(</span><span class="n">AbstractStochasticSeededProtocol</span><span class="p">):</span>
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">data</span> <span class="o">=</span> <span class="n">mock_labelled_collection</span><span class="p">()</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">samples_parameters</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
<span class="c1"># return a matrix containing sampling indexes in the rows</span>
|
||||
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="p">),</span> <span class="mi">10</span><span class="o">*</span><span class="mi">10</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">sample</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
|
||||
<span class="n">index</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">unique</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">sampling_from_index</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
|
||||
|
||||
<span class="n">p</span> <span class="o">=</span> <span class="n">NoSeedInit</span><span class="p">()</span>
|
||||
|
||||
<span class="c1"># this should raise a ValueError, since the class is said to be AbstractStochasticSeededProtocol but the</span>
|
||||
<span class="c1"># random_seed has never been passed to super(NoSeedInit, self).__init__(random_seed)</span>
|
||||
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">assertRaises</span><span class="p">(</span><span class="ne">ValueError</span><span class="p">):</span>
|
||||
<span class="k">for</span> <span class="n">sample</span> <span class="ow">in</span> <span class="n">p</span><span class="p">():</span>
|
||||
<span class="k">pass</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'done'</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,225 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" data-content_root="../../../">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.tests.test_replicability — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=92fd9be5" />
|
||||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||||
<script src="../../../_static/documentation_options.js?v=22607128"></script>
|
||||
<script src="../../../_static/doctools.js?v=9a2dae69"></script>
|
||||
<script src="../../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
||||
<script src="../../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.tests.test_replicability</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.tests.test_replicability</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">unittest</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.data</span> <span class="kn">import</span> <span class="n">LabelledCollection</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.functional</span> <span class="kn">import</span> <span class="n">strprev</span>
|
||||
<span class="kn">from</span> <span class="nn">sklearn.linear_model</span> <span class="kn">import</span> <span class="n">LogisticRegression</span>
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">quapy.method.aggregative</span> <span class="kn">import</span> <span class="n">PACC</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy.functional</span> <span class="k">as</span> <span class="nn">F</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MyTestCase">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase">[docs]</a>
|
||||
<span class="k">class</span> <span class="nc">MyTestCase</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
|
||||
|
||||
<div class="viewcode-block" id="MyTestCase.test_prediction_replicability">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_prediction_replicability">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_prediction_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="n">dataset</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIBinaryDataset</span><span class="p">(</span><span class="s1">'yeast'</span><span class="p">)</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">10000</span><span class="p">)</span>
|
||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)</span>
|
||||
<span class="n">prev</span> <span class="o">=</span> <span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="n">str_prev1</span> <span class="o">=</span> <span class="n">strprev</span><span class="p">(</span><span class="n">prev</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">lr</span> <span class="o">=</span> <span class="n">LogisticRegression</span><span class="p">(</span><span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">max_iter</span><span class="o">=</span><span class="mi">10000</span><span class="p">)</span>
|
||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">lr</span><span class="p">)</span>
|
||||
<span class="n">prev2</span> <span class="o">=</span> <span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">training</span><span class="p">)</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">test</span><span class="o">.</span><span class="n">X</span><span class="p">)</span>
|
||||
<span class="n">str_prev2</span> <span class="o">=</span> <span class="n">strprev</span><span class="p">(</span><span class="n">prev2</span><span class="p">,</span> <span class="n">prec</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">str_prev1</span><span class="p">,</span> <span class="n">str_prev2</span><span class="p">)</span> <span class="c1"># add assertion here</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MyTestCase.test_samping_replicability">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_samping_replicability">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_samping_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="k">def</span> <span class="nf">equal_collections</span><span class="p">(</span><span class="n">c1</span><span class="p">,</span> <span class="n">c2</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">Xtr</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">Xtr</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">ytr</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">ytr</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">value</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">c1</span><span class="o">.</span><span class="n">classes_</span> <span class="o">==</span> <span class="n">c2</span><span class="o">.</span><span class="n">classes_</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
|
||||
|
||||
<span class="n">X</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="nb">range</span><span class="p">(</span><span class="mi">100</span><span class="p">)))</span>
|
||||
<span class="n">y</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randint</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span>
|
||||
<span class="n">data</span> <span class="o">=</span> <span class="n">LabelledCollection</span><span class="p">(</span><span class="n">instances</span><span class="o">=</span><span class="n">X</span><span class="p">,</span> <span class="n">labels</span><span class="o">=</span><span class="n">y</span><span class="p">)</span>
|
||||
|
||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
|
||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">)</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">False</span><span class="p">)</span>
|
||||
|
||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">])</span>
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">])</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">sample1</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">sample2</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">50</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.7</span><span class="p">,</span> <span class="mf">0.3</span><span class="p">],</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1</span><span class="p">,</span> <span class="n">sample2</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample1_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">sample2_tr</span><span class="p">,</span> <span class="n">sample2_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample2_tr</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_te</span><span class="p">,</span> <span class="n">sample2_te</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample1_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">sample2_tr</span><span class="p">,</span> <span class="n">sample2_te</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">split_stratified</span><span class="p">(</span><span class="n">train_prop</span><span class="o">=</span><span class="mf">0.7</span><span class="p">)</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_tr</span><span class="p">,</span> <span class="n">sample2_tr</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">equal_collections</span><span class="p">(</span><span class="n">sample1_te</span><span class="p">,</span> <span class="n">sample2_te</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
|
||||
<div class="viewcode-block" id="MyTestCase.test_parallel_replicability">
|
||||
<a class="viewcode-back" href="../../../quapy.tests.html#quapy.tests.test_replicability.MyTestCase.test_parallel_replicability">[docs]</a>
|
||||
<span class="k">def</span> <span class="nf">test_parallel_replicability</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
|
||||
|
||||
<span class="n">train</span><span class="p">,</span> <span class="n">test</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">datasets</span><span class="o">.</span><span class="n">fetch_UCIMulticlassDataset</span><span class="p">(</span><span class="s1">'dry-bean'</span><span class="p">)</span><span class="o">.</span><span class="n">train_test</span>
|
||||
|
||||
<span class="n">test</span> <span class="o">=</span> <span class="n">test</span><span class="o">.</span><span class="n">sampling</span><span class="p">(</span><span class="mi">500</span><span class="p">,</span> <span class="o">*</span><span class="p">[</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.2</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">])</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">10</span><span class="p">):</span>
|
||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
|
||||
<span class="n">prev1</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
|
||||
<span class="n">prev2</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
|
||||
|
||||
<span class="k">with</span> <span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">):</span>
|
||||
<span class="n">pacc</span> <span class="o">=</span> <span class="n">PACC</span><span class="p">(</span><span class="n">LogisticRegression</span><span class="p">(),</span> <span class="n">val_split</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">n_jobs</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
|
||||
<span class="n">pacc</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">val_split</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
|
||||
<span class="n">prev3</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">strprev</span><span class="p">(</span><span class="n">pacc</span><span class="o">.</span><span class="n">quantify</span><span class="p">(</span><span class="n">test</span><span class="o">.</span><span class="n">instances</span><span class="p">))</span>
|
||||
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">prev1</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">prev2</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="n">prev3</span><span class="p">)</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span><span class="n">prev1</span><span class="p">,</span> <span class="n">prev2</span><span class="p">)</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">prev2</span><span class="p">,</span> <span class="n">prev3</span><span class="p">)</span></div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span>
|
||||
<span class="n">unittest</span><span class="o">.</span><span class="n">main</span><span class="p">()</span>
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -1,402 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>quapy.util — QuaPy: A Python-based open-source framework for quantification 0.1.8 documentation</title>
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
|
||||
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
|
||||
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||||
<script src="../../_static/jquery.js"></script>
|
||||
<script src="../../_static/underscore.js"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../_static/doctools.js"></script>
|
||||
<script src="../../_static/sphinx_highlight.js"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
|
||||
|
||||
|
||||
<a href="../../index.html" class="icon icon-home">
|
||||
QuaPy: A Python-based open-source framework for quantification
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../modules.html">quapy</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">QuaPy: A Python-based open-source framework for quantification</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||||
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
||||
<li class="breadcrumb-item active">quapy.util</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<h1>Source code for quapy.util</h1><div class="highlight"><pre>
|
||||
<span></span><span class="kn">import</span> <span class="nn">contextlib</span>
|
||||
<span class="kn">import</span> <span class="nn">itertools</span>
|
||||
<span class="kn">import</span> <span class="nn">multiprocessing</span>
|
||||
<span class="kn">import</span> <span class="nn">os</span>
|
||||
<span class="kn">import</span> <span class="nn">pickle</span>
|
||||
<span class="kn">import</span> <span class="nn">urllib</span>
|
||||
<span class="kn">from</span> <span class="nn">pathlib</span> <span class="kn">import</span> <span class="n">Path</span>
|
||||
<span class="kn">from</span> <span class="nn">contextlib</span> <span class="kn">import</span> <span class="n">ExitStack</span>
|
||||
<span class="kn">import</span> <span class="nn">quapy</span> <span class="k">as</span> <span class="nn">qp</span>
|
||||
|
||||
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
|
||||
<span class="kn">from</span> <span class="nn">joblib</span> <span class="kn">import</span> <span class="n">Parallel</span><span class="p">,</span> <span class="n">delayed</span>
|
||||
<span class="kn">from</span> <span class="nn">time</span> <span class="kn">import</span> <span class="n">time</span>
|
||||
<span class="kn">import</span> <span class="nn">signal</span>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_get_parallel_slices</span><span class="p">(</span><span class="n">n_tasks</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">n_jobs</span> <span class="o">==</span> <span class="o">-</span><span class="mi">1</span><span class="p">:</span>
|
||||
<span class="n">n_jobs</span> <span class="o">=</span> <span class="n">multiprocessing</span><span class="o">.</span><span class="n">cpu_count</span><span class="p">()</span>
|
||||
<span class="n">batch</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">n_tasks</span> <span class="o">/</span> <span class="n">n_jobs</span><span class="p">)</span>
|
||||
<span class="n">remainder</span> <span class="o">=</span> <span class="n">n_tasks</span> <span class="o">%</span> <span class="n">n_jobs</span>
|
||||
<span class="k">return</span> <span class="p">[</span><span class="nb">slice</span><span class="p">(</span><span class="n">job</span> <span class="o">*</span> <span class="n">batch</span><span class="p">,</span> <span class="p">(</span><span class="n">job</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="n">batch</span> <span class="o">+</span> <span class="p">(</span><span class="n">remainder</span> <span class="k">if</span> <span class="n">job</span> <span class="o">==</span> <span class="n">n_jobs</span> <span class="o">-</span> <span class="mi">1</span> <span class="k">else</span> <span class="mi">0</span><span class="p">))</span> <span class="k">for</span> <span class="n">job</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n_jobs</span><span class="p">)]</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="map_parallel"><a class="viewcode-back" href="../../quapy.html#quapy.util.map_parallel">[docs]</a><span class="k">def</span> <span class="nf">map_parallel</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Applies func to n_jobs slices of args. E.g., if args is an array of 99 items and n_jobs=2, then</span>
|
||||
<span class="sd"> func is applied in two parallel processes to args[0:50] and to args[50:99]. func is a function</span>
|
||||
<span class="sd"> that already works with a list of arguments.</span>
|
||||
|
||||
<span class="sd"> :param func: function to be parallelized</span>
|
||||
<span class="sd"> :param args: array-like of arguments to be passed to the function in different parallel calls</span>
|
||||
<span class="sd"> :param n_jobs: the number of workers</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">args</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
|
||||
<span class="n">slices</span> <span class="o">=</span> <span class="n">_get_parallel_slices</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">args</span><span class="p">),</span> <span class="n">n_jobs</span><span class="p">)</span>
|
||||
<span class="n">results</span> <span class="o">=</span> <span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">)(</span>
|
||||
<span class="n">delayed</span><span class="p">(</span><span class="n">func</span><span class="p">)(</span><span class="n">args</span><span class="p">[</span><span class="n">slice_i</span><span class="p">])</span> <span class="k">for</span> <span class="n">slice_i</span> <span class="ow">in</span> <span class="n">slices</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="o">.</span><span class="n">from_iterable</span><span class="p">(</span><span class="n">results</span><span class="p">))</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="parallel"><a class="viewcode-back" href="../../quapy.html#quapy.util.parallel">[docs]</a><span class="k">def</span> <span class="nf">parallel</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">n_jobs</span><span class="p">,</span> <span class="n">seed</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">asarray</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="s1">'loky'</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> A wrapper of multiprocessing:</span>
|
||||
|
||||
<span class="sd"> >>> Parallel(n_jobs=n_jobs)(</span>
|
||||
<span class="sd"> >>> delayed(func)(args_i) for args_i in args</span>
|
||||
<span class="sd"> >>> )</span>
|
||||
|
||||
<span class="sd"> that takes the `quapy.environ` variable as input silently.</span>
|
||||
<span class="sd"> Seeds the child processes to ensure reproducibility when n_jobs>1.</span>
|
||||
|
||||
<span class="sd"> :param func: callable</span>
|
||||
<span class="sd"> :param args: args of func</span>
|
||||
<span class="sd"> :param seed: the numeric seed</span>
|
||||
<span class="sd"> :param asarray: set to True to return a np.ndarray instead of a list</span>
|
||||
<span class="sd"> :param backend: indicates the backend used for handling parallel works</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="nf">func_dec</span><span class="p">(</span><span class="n">environ</span><span class="p">,</span> <span class="n">seed</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span> <span class="o">=</span> <span class="n">environ</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'N_JOBS'</span><span class="p">]</span> <span class="o">=</span> <span class="mi">1</span>
|
||||
<span class="c1">#set a context with a temporal seed to ensure results are reproducibles in parallel</span>
|
||||
<span class="k">with</span> <span class="n">ExitStack</span><span class="p">()</span> <span class="k">as</span> <span class="n">stack</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">stack</span><span class="o">.</span><span class="n">enter_context</span><span class="p">(</span><span class="n">qp</span><span class="o">.</span><span class="n">util</span><span class="o">.</span><span class="n">temp_seed</span><span class="p">(</span><span class="n">seed</span><span class="p">))</span>
|
||||
<span class="k">return</span> <span class="n">func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
|
||||
|
||||
<span class="n">out</span> <span class="o">=</span> <span class="n">Parallel</span><span class="p">(</span><span class="n">n_jobs</span><span class="o">=</span><span class="n">n_jobs</span><span class="p">,</span> <span class="n">backend</span><span class="o">=</span><span class="n">backend</span><span class="p">)(</span>
|
||||
<span class="n">delayed</span><span class="p">(</span><span class="n">func_dec</span><span class="p">)(</span><span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">,</span> <span class="kc">None</span> <span class="k">if</span> <span class="n">seed</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">seed</span><span class="o">+</span><span class="n">i</span><span class="p">,</span> <span class="n">args_i</span><span class="p">)</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">args_i</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">args</span><span class="p">)</span>
|
||||
<span class="p">)</span>
|
||||
<span class="k">if</span> <span class="n">asarray</span><span class="p">:</span>
|
||||
<span class="n">out</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">(</span><span class="n">out</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">out</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="temp_seed"><a class="viewcode-back" href="../../quapy.html#quapy.util.temp_seed">[docs]</a><span class="nd">@contextlib</span><span class="o">.</span><span class="n">contextmanager</span>
|
||||
<span class="k">def</span> <span class="nf">temp_seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Can be used in a "with" context to set a temporal seed without modifying the outer numpy's current state. E.g.:</span>
|
||||
|
||||
<span class="sd"> >>> with temp_seed(random_seed):</span>
|
||||
<span class="sd"> >>> pass # do any computation depending on np.random functionality</span>
|
||||
|
||||
<span class="sd"> :param random_state: the seed to set within the "with" context</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">state</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">get_state</span><span class="p">()</span>
|
||||
<span class="c1">#save the seed just in case is needed (for instance for setting the seed to child processes)</span>
|
||||
<span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'_R_SEED'</span><span class="p">]</span> <span class="o">=</span> <span class="n">random_state</span>
|
||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="n">random_state</span><span class="p">)</span>
|
||||
<span class="k">try</span><span class="p">:</span>
|
||||
<span class="k">yield</span>
|
||||
<span class="k">finally</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">random_state</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">set_state</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="download_file"><a class="viewcode-back" href="../../quapy.html#quapy.util.download_file">[docs]</a><span class="k">def</span> <span class="nf">download_file</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Downloads a file from a url</span>
|
||||
|
||||
<span class="sd"> :param url: the url</span>
|
||||
<span class="sd"> :param archive_filename: destination filename</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">def</span> <span class="nf">progress</span><span class="p">(</span><span class="n">blocknum</span><span class="p">,</span> <span class="n">bs</span><span class="p">,</span> <span class="n">size</span><span class="p">):</span>
|
||||
<span class="n">total_sz_mb</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%.2f</span><span class="s1"> MB'</span> <span class="o">%</span> <span class="p">(</span><span class="n">size</span> <span class="o">/</span> <span class="mf">1e6</span><span class="p">)</span>
|
||||
<span class="n">current_sz_mb</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%.2f</span><span class="s1"> MB'</span> <span class="o">%</span> <span class="p">((</span><span class="n">blocknum</span> <span class="o">*</span> <span class="n">bs</span><span class="p">)</span> <span class="o">/</span> <span class="mf">1e6</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s1">'</span><span class="se">\r</span><span class="s1">downloaded </span><span class="si">%s</span><span class="s1"> / </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">current_sz_mb</span><span class="p">,</span> <span class="n">total_sz_mb</span><span class="p">),</span> <span class="n">end</span><span class="o">=</span><span class="s1">''</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">"Downloading </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="n">url</span><span class="p">)</span>
|
||||
<span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlretrieve</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">filename</span><span class="o">=</span><span class="n">archive_filename</span><span class="p">,</span> <span class="n">reporthook</span><span class="o">=</span><span class="n">progress</span><span class="p">)</span>
|
||||
<span class="nb">print</span><span class="p">(</span><span class="s2">""</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="download_file_if_not_exists"><a class="viewcode-back" href="../../quapy.html#quapy.util.download_file_if_not_exists">[docs]</a><span class="k">def</span> <span class="nf">download_file_if_not_exists</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Dowloads a function (using :meth:`download_file`) if the file does not exist.</span>
|
||||
|
||||
<span class="sd"> :param url: the url</span>
|
||||
<span class="sd"> :param archive_filename: destination filename</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">archive_filename</span><span class="p">):</span>
|
||||
<span class="k">return</span>
|
||||
<span class="n">create_if_not_exist</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">archive_filename</span><span class="p">))</span>
|
||||
<span class="n">download_file</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">archive_filename</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="create_if_not_exist"><a class="viewcode-back" href="../../quapy.html#quapy.util.create_if_not_exist">[docs]</a><span class="k">def</span> <span class="nf">create_if_not_exist</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> An alias to `os.makedirs(path, exist_ok=True)` that also returns the path. This is useful in cases like, e.g.:</span>
|
||||
|
||||
<span class="sd"> >>> path = create_if_not_exist(os.path.join(dir, subdir, anotherdir))</span>
|
||||
|
||||
<span class="sd"> :param path: path to create</span>
|
||||
<span class="sd"> :return: the path itself</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">path</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="get_quapy_home"><a class="viewcode-back" href="../../quapy.html#quapy.util.get_quapy_home">[docs]</a><span class="k">def</span> <span class="nf">get_quapy_home</span><span class="p">():</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Gets the home directory of QuaPy, i.e., the directory where QuaPy saves permanent data, such as dowloaded datasets.</span>
|
||||
<span class="sd"> This directory is `~/quapy_data`</span>
|
||||
|
||||
<span class="sd"> :return: a string representing the path</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">home</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">Path</span><span class="o">.</span><span class="n">home</span><span class="p">()),</span> <span class="s1">'quapy_data'</span><span class="p">)</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">home</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">home</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="create_parent_dir"><a class="viewcode-back" href="../../quapy.html#quapy.util.create_parent_dir">[docs]</a><span class="k">def</span> <span class="nf">create_parent_dir</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Creates the parent dir (if any) of a given path, if not exists. E.g., for `./path/to/file.txt`, the path `./path/to`</span>
|
||||
<span class="sd"> is created.</span>
|
||||
|
||||
<span class="sd"> :param path: the path</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">parentdir</span> <span class="o">=</span> <span class="n">Path</span><span class="p">(</span><span class="n">path</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span>
|
||||
<span class="k">if</span> <span class="n">parentdir</span><span class="p">:</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">parentdir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="save_text_file"><a class="viewcode-back" href="../../quapy.html#quapy.util.save_text_file">[docs]</a><span class="k">def</span> <span class="nf">save_text_file</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">text</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Saves a text file to disk, given its full path, and creates the parent directory if missing.</span>
|
||||
|
||||
<span class="sd"> :param path: path where to save the path.</span>
|
||||
<span class="sd"> :param text: text to save.</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="n">create_parent_dir</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
|
||||
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">text</span><span class="p">,</span> <span class="s1">'wt'</span><span class="p">)</span> <span class="k">as</span> <span class="n">fout</span><span class="p">:</span>
|
||||
<span class="n">fout</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">text</span><span class="p">)</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="pickled_resource"><a class="viewcode-back" href="../../quapy.html#quapy.util.pickled_resource">[docs]</a><span class="k">def</span> <span class="nf">pickled_resource</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">:</span><span class="nb">str</span><span class="p">,</span> <span class="n">generation_func</span><span class="p">:</span><span class="n">callable</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Allows for fast reuse of resources that are generated only once by calling generation_func(\\*args). The next times</span>
|
||||
<span class="sd"> this function is invoked, it loads the pickled resource. Example:</span>
|
||||
|
||||
<span class="sd"> >>> def some_array(n): # a mock resource created with one parameter (`n`)</span>
|
||||
<span class="sd"> >>> return np.random.rand(n)</span>
|
||||
<span class="sd"> >>> pickled_resource('./my_array.pkl', some_array, 10) # the resource does not exist: it is created by calling some_array(10)</span>
|
||||
<span class="sd"> >>> pickled_resource('./my_array.pkl', some_array, 10) # the resource exists; it is loaded from './my_array.pkl'</span>
|
||||
|
||||
<span class="sd"> :param pickle_path: the path where to save (first time) and load (next times) the resource</span>
|
||||
<span class="sd"> :param generation_func: the function that generates the resource, in case it does not exist in pickle_path</span>
|
||||
<span class="sd"> :param args: any arg that generation_func uses for generating the resources</span>
|
||||
<span class="sd"> :return: the resource</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">pickle_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">return</span> <span class="n">generation_func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">):</span>
|
||||
<span class="k">return</span> <span class="n">pickle</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="nb">open</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="s1">'rb'</span><span class="p">))</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="n">instance</span> <span class="o">=</span> <span class="n">generation_func</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span>
|
||||
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">)</span><span class="o">.</span><span class="n">parent</span><span class="p">),</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
||||
<span class="n">pickle</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="nb">open</span><span class="p">(</span><span class="n">pickle_path</span><span class="p">,</span> <span class="s1">'wb'</span><span class="p">),</span> <span class="n">pickle</span><span class="o">.</span><span class="n">HIGHEST_PROTOCOL</span><span class="p">)</span>
|
||||
<span class="k">return</span> <span class="n">instance</span></div>
|
||||
|
||||
|
||||
<span class="k">def</span> <span class="nf">_check_sample_size</span><span class="p">(</span><span class="n">sample_size</span><span class="p">):</span>
|
||||
<span class="k">if</span> <span class="n">sample_size</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
||||
<span class="k">assert</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span> \
|
||||
<span class="s1">'error: sample_size set to None, and cannot be resolved from the environment'</span>
|
||||
<span class="n">sample_size</span> <span class="o">=</span> <span class="n">qp</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s1">'SAMPLE_SIZE'</span><span class="p">]</span>
|
||||
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sample_size</span><span class="p">,</span> <span class="nb">int</span><span class="p">)</span> <span class="ow">and</span> <span class="n">sample_size</span> <span class="o">></span> <span class="mi">0</span><span class="p">,</span> \
|
||||
<span class="s1">'error: sample_size is not a positive integer'</span>
|
||||
<span class="k">return</span> <span class="n">sample_size</span>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="EarlyStop"><a class="viewcode-back" href="../../quapy.html#quapy.util.EarlyStop">[docs]</a><span class="k">class</span> <span class="nc">EarlyStop</span><span class="p">:</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> A class implementing the early-stopping condition typically used for training neural networks.</span>
|
||||
|
||||
<span class="sd"> >>> earlystop = EarlyStop(patience=2, lower_is_better=True)</span>
|
||||
<span class="sd"> >>> earlystop(0.9, epoch=0)</span>
|
||||
<span class="sd"> >>> earlystop(0.7, epoch=1)</span>
|
||||
<span class="sd"> >>> earlystop.IMPROVED # is True</span>
|
||||
<span class="sd"> >>> earlystop(1.0, epoch=2)</span>
|
||||
<span class="sd"> >>> earlystop.STOP # is False (patience=1)</span>
|
||||
<span class="sd"> >>> earlystop(1.0, epoch=3)</span>
|
||||
<span class="sd"> >>> earlystop.STOP # is True (patience=0)</span>
|
||||
<span class="sd"> >>> earlystop.best_epoch # is 1</span>
|
||||
<span class="sd"> >>> earlystop.best_score # is 0.7</span>
|
||||
|
||||
<span class="sd"> :param patience: the number of (consecutive) times that a monitored evaluation metric (typically obtaind in a</span>
|
||||
<span class="sd"> held-out validation split) can be found to be worse than the best one obtained so far, before flagging the</span>
|
||||
<span class="sd"> stopping condition. An instance of this class is `callable`, and is to be used as follows:</span>
|
||||
<span class="sd"> :param lower_is_better: if True (default) the metric is to be minimized.</span>
|
||||
<span class="sd"> :ivar best_score: keeps track of the best value seen so far</span>
|
||||
<span class="sd"> :ivar best_epoch: keeps track of the epoch in which the best score was set</span>
|
||||
<span class="sd"> :ivar STOP: flag (boolean) indicating the stopping condition</span>
|
||||
<span class="sd"> :ivar IMPROVED: flag (boolean) indicating whether there was an improvement in the last call</span>
|
||||
<span class="sd"> """</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">patience</span><span class="p">,</span> <span class="n">lower_is_better</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
|
||||
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span> <span class="o">=</span> <span class="n">patience</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">better</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">a</span><span class="p">,</span><span class="n">b</span><span class="p">:</span> <span class="n">a</span><span class="o"><</span><span class="n">b</span> <span class="k">if</span> <span class="n">lower_is_better</span> <span class="k">else</span> <span class="n">a</span><span class="o">></span><span class="n">b</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="n">patience</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_epoch</span> <span class="o">=</span> <span class="kc">None</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">STOP</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span> <span class="o">=</span> <span class="kc">False</span>
|
||||
|
||||
<span class="k">def</span> <span class="fm">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">watch_score</span><span class="p">,</span> <span class="n">epoch</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Commits the new score found in epoch `epoch`. If the score improves over the best score found so far, then</span>
|
||||
<span class="sd"> the patiente counter gets reset. If otherwise, the patience counter is decreased, and in case it reachs 0,</span>
|
||||
<span class="sd"> the flag STOP becomes True.</span>
|
||||
|
||||
<span class="sd"> :param watch_score: the new score</span>
|
||||
<span class="sd"> :param epoch: the current epoch</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span> <span class="o">=</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="bp">self</span><span class="o">.</span><span class="n">better</span><span class="p">(</span><span class="n">watch_score</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">best_score</span><span class="p">))</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">IMPROVED</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_score</span> <span class="o">=</span> <span class="n">watch_score</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">best_epoch</span> <span class="o">=</span> <span class="n">epoch</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">PATIENCE_LIMIT</span>
|
||||
<span class="k">else</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o">-=</span> <span class="mi">1</span>
|
||||
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">patience</span> <span class="o"><=</span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="bp">self</span><span class="o">.</span><span class="n">STOP</span> <span class="o">=</span> <span class="kc">True</span></div>
|
||||
|
||||
|
||||
<div class="viewcode-block" id="timeout"><a class="viewcode-back" href="../../quapy.html#quapy.util.timeout">[docs]</a><span class="nd">@contextlib</span><span class="o">.</span><span class="n">contextmanager</span>
|
||||
<span class="k">def</span> <span class="nf">timeout</span><span class="p">(</span><span class="n">seconds</span><span class="p">):</span>
|
||||
<span class="w"> </span><span class="sd">"""</span>
|
||||
<span class="sd"> Opens a context that will launch an exception if not closed after a given number of seconds</span>
|
||||
|
||||
<span class="sd"> >>> def func(start_msg, end_msg):</span>
|
||||
<span class="sd"> >>> print(start_msg)</span>
|
||||
<span class="sd"> >>> sleep(2)</span>
|
||||
<span class="sd"> >>> print(end_msg)</span>
|
||||
<span class="sd"> >>></span>
|
||||
<span class="sd"> >>> with timeout(1):</span>
|
||||
<span class="sd"> >>> func('begin function', 'end function')</span>
|
||||
<span class="sd"> >>> Out[]</span>
|
||||
<span class="sd"> >>> begin function</span>
|
||||
<span class="sd"> >>> TimeoutError</span>
|
||||
|
||||
|
||||
<span class="sd"> :param seconds: number of seconds, set to <=0 to ignore the timer</span>
|
||||
<span class="sd"> """</span>
|
||||
<span class="k">if</span> <span class="n">seconds</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="k">def</span> <span class="nf">handler</span><span class="p">(</span><span class="n">signum</span><span class="p">,</span> <span class="n">frame</span><span class="p">):</span>
|
||||
<span class="k">raise</span> <span class="ne">TimeoutError</span><span class="p">()</span>
|
||||
|
||||
<span class="n">signal</span><span class="o">.</span><span class="n">signal</span><span class="p">(</span><span class="n">signal</span><span class="o">.</span><span class="n">SIGALRM</span><span class="p">,</span> <span class="n">handler</span><span class="p">)</span>
|
||||
<span class="n">signal</span><span class="o">.</span><span class="n">alarm</span><span class="p">(</span><span class="n">seconds</span><span class="p">)</span>
|
||||
|
||||
<span class="k">yield</span>
|
||||
|
||||
<span class="k">if</span> <span class="n">seconds</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
||||
<span class="n">signal</span><span class="o">.</span><span class="n">alarm</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span></div>
|
||||
|
||||
</pre></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2024, Alejandro Moreo.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -52,14 +52,6 @@ quapy.method.non\_aggregative module
|
|||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
quapy.method.composable module
|
||||
------------------------------
|
||||
|
||||
.. automodule:: quapy.method.composable
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
||||
|
||||
Module contents
|
||||
---------------
|
||||
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
/*
|
||||
* _sphinx_javascript_frameworks_compat.js
|
||||
* ~~~~~~~~~~
|
||||
*
|
||||
* Compatability shim for jQuery and underscores.js.
|
||||
*
|
||||
* WILL BE REMOVED IN Sphinx 6.0
|
||||
* xref RemovedInSphinx60Warning
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* select a different prefix for underscore
|
||||
*/
|
||||
$u = _.noConflict();
|
||||
|
||||
|
||||
/**
|
||||
* small helper function to urldecode strings
|
||||
*
|
||||
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
|
||||
*/
|
||||
jQuery.urldecode = function(x) {
|
||||
if (!x) {
|
||||
return x
|
||||
}
|
||||
return decodeURIComponent(x.replace(/\+/g, ' '));
|
||||
};
|
||||
|
||||
/**
|
||||
* small helper function to urlencode strings
|
||||
*/
|
||||
jQuery.urlencode = encodeURIComponent;
|
||||
|
||||
/**
|
||||
* This function returns the parsed url parameters of the
|
||||
* current request. Multiple values per key are supported,
|
||||
* it will always return arrays of strings for the value parts.
|
||||
*/
|
||||
jQuery.getQueryParameters = function(s) {
|
||||
if (typeof s === 'undefined')
|
||||
s = document.location.search;
|
||||
var parts = s.substr(s.indexOf('?') + 1).split('&');
|
||||
var result = {};
|
||||
for (var i = 0; i < parts.length; i++) {
|
||||
var tmp = parts[i].split('=', 2);
|
||||
var key = jQuery.urldecode(tmp[0]);
|
||||
var value = jQuery.urldecode(tmp[1]);
|
||||
if (key in result)
|
||||
result[key].push(value);
|
||||
else
|
||||
result[key] = [value];
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
/**
|
||||
* highlight a given string on a jquery object by wrapping it in
|
||||
* span elements with the given class name.
|
||||
*/
|
||||
jQuery.fn.highlightText = function(text, className) {
|
||||
function highlight(node, addItems) {
|
||||
if (node.nodeType === 3) {
|
||||
var val = node.nodeValue;
|
||||
var pos = val.toLowerCase().indexOf(text);
|
||||
if (pos >= 0 &&
|
||||
!jQuery(node.parentNode).hasClass(className) &&
|
||||
!jQuery(node.parentNode).hasClass("nohighlight")) {
|
||||
var span;
|
||||
var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
|
||||
if (isInSVG) {
|
||||
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
|
||||
} else {
|
||||
span = document.createElement("span");
|
||||
span.className = className;
|
||||
}
|
||||
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
|
||||
node.parentNode.insertBefore(span, node.parentNode.insertBefore(
|
||||
document.createTextNode(val.substr(pos + text.length)),
|
||||
node.nextSibling));
|
||||
node.nodeValue = val.substr(0, pos);
|
||||
if (isInSVG) {
|
||||
var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
|
||||
var bbox = node.parentElement.getBBox();
|
||||
rect.x.baseVal.value = bbox.x;
|
||||
rect.y.baseVal.value = bbox.y;
|
||||
rect.width.baseVal.value = bbox.width;
|
||||
rect.height.baseVal.value = bbox.height;
|
||||
rect.setAttribute('class', className);
|
||||
addItems.push({
|
||||
"parent": node.parentNode,
|
||||
"target": rect});
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!jQuery(node).is("button, select, textarea")) {
|
||||
jQuery.each(node.childNodes, function() {
|
||||
highlight(this, addItems);
|
||||
});
|
||||
}
|
||||
}
|
||||
var addItems = [];
|
||||
var result = this.each(function() {
|
||||
highlight(this, addItems);
|
||||
});
|
||||
for (var i = 0; i < addItems.length; ++i) {
|
||||
jQuery(addItems[i].parent).before(addItems[i].target);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
/*
|
||||
* backward compatibility for jQuery.browser
|
||||
* This will be supported until firefox bug is fixed.
|
||||
*/
|
||||
if (!jQuery.browser) {
|
||||
jQuery.uaMatch = function(ua) {
|
||||
ua = ua.toLowerCase();
|
||||
|
||||
var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
|
||||
/(webkit)[ \/]([\w.]+)/.exec(ua) ||
|
||||
/(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
|
||||
/(msie) ([\w.]+)/.exec(ua) ||
|
||||
ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
|
||||
[];
|
||||
|
||||
return {
|
||||
browser: match[ 1 ] || "",
|
||||
version: match[ 2 ] || "0"
|
||||
};
|
||||
};
|
||||
jQuery.browser = {};
|
||||
jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
|
||||
}
|
||||
|
|
@ -0,0 +1,925 @@
|
|||
/*
|
||||
* basic.css
|
||||
* ~~~~~~~~~
|
||||
*
|
||||
* Sphinx stylesheet -- basic theme.
|
||||
*
|
||||
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
|
||||
* :license: BSD, see LICENSE for details.
|
||||
*
|
||||
*/
|
||||
|
||||
/* -- main layout ----------------------------------------------------------- */
|
||||
|
||||
div.clearer {
|
||||
clear: both;
|
||||
}
|
||||
|
||||
div.section::after {
|
||||
display: block;
|
||||
content: '';
|
||||
clear: left;
|
||||
}
|
||||
|
||||
/* -- relbar ---------------------------------------------------------------- */
|
||||
|
||||
div.related {
|
||||
width: 100%;
|
||||
font-size: 90%;
|
||||
}
|
||||
|
||||
div.related h3 {
|
||||
display: none;
|
||||
}
|
||||
|
||||
div.related ul {
|
||||
margin: 0;
|
||||
padding: 0 0 0 10px;
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
div.related li {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
div.related li.right {
|
||||
float: right;
|
||||
margin-right: 5px;
|
||||
}
|
||||
|
||||
/* -- sidebar --------------------------------------------------------------- */
|
||||
|
||||
div.sphinxsidebarwrapper {
|
||||
padding: 10px 5px 0 10px;
|
||||
}
|
||||
|
||||
div.sphinxsidebar {
|
||||
float: left;
|
||||
width: 230px;
|
||||
margin-left: -100%;
|
||||
font-size: 90%;
|
||||
word-wrap: break-word;
|
||||
overflow-wrap : break-word;
|
||||
}
|
||||
|
||||
div.sphinxsidebar ul {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
div.sphinxsidebar ul ul,
|
||||
div.sphinxsidebar ul.want-points {
|
||||
margin-left: 20px;
|
||||
list-style: square;
|
||||
}
|
||||
|
||||
div.sphinxsidebar ul ul {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
div.sphinxsidebar form {
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
div.sphinxsidebar input {
|
||||
border: 1px solid #98dbcc;
|
||||
font-family: sans-serif;
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
div.sphinxsidebar #searchbox form.search {
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
div.sphinxsidebar #searchbox input[type="text"] {
|
||||
float: left;
|
||||
width: 80%;
|
||||
padding: 0.25em;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
div.sphinxsidebar #searchbox input[type="submit"] {
|
||||
float: left;
|
||||
width: 20%;
|
||||
border-left: none;
|
||||
padding: 0.25em;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
|
||||
img {
|
||||
border: 0;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
/* -- search page ----------------------------------------------------------- */
|
||||
|
||||
ul.search {
|
||||
margin: 10px 0 0 20px;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ul.search li {
|
||||
padding: 5px 0 5px 20px;
|
||||
background-image: url(file.png);
|
||||
background-repeat: no-repeat;
|
||||
background-position: 0 7px;
|
||||
}
|
||||
|
||||
ul.search li a {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
ul.search li p.context {
|
||||
color: #888;
|
||||
margin: 2px 0 0 30px;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
ul.keywordmatches li.goodmatch a {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* -- index page ------------------------------------------------------------ */
|
||||
|
||||
table.contentstable {
|
||||
width: 90%;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
table.contentstable p.biglink {
|
||||
line-height: 150%;
|
||||
}
|
||||
|
||||
a.biglink {
|
||||
font-size: 1.3em;
|
||||
}
|
||||
|
||||
span.linkdescr {
|
||||
font-style: italic;
|
||||
padding-top: 5px;
|
||||
font-size: 90%;
|
||||
}
|
||||
|
||||
/* -- general index --------------------------------------------------------- */
|
||||
|
||||
table.indextable {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
table.indextable td {
|
||||
text-align: left;
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
table.indextable ul {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
table.indextable > tbody > tr > td > ul {
|
||||
padding-left: 0em;
|
||||
}
|
||||
|
||||
table.indextable tr.pcap {
|
||||
height: 10px;
|
||||
}
|
||||
|
||||
table.indextable tr.cap {
|
||||
margin-top: 10px;
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
|
||||
img.toggler {
|
||||
margin-right: 3px;
|
||||
margin-top: 3px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
div.modindex-jumpbox {
|
||||
border-top: 1px solid #ddd;
|
||||
border-bottom: 1px solid #ddd;
|
||||
margin: 1em 0 1em 0;
|
||||
padding: 0.4em;
|
||||
}
|
||||
|
||||
div.genindex-jumpbox {
|
||||
border-top: 1px solid #ddd;
|
||||
border-bottom: 1px solid #ddd;
|
||||
margin: 1em 0 1em 0;
|
||||
padding: 0.4em;
|
||||
}
|
||||
|
||||
/* -- domain module index --------------------------------------------------- */
|
||||
|
||||
table.modindextable td {
|
||||
padding: 2px;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
/* -- general body styles --------------------------------------------------- */
|
||||
|
||||
div.body {
|
||||
min-width: 360px;
|
||||
max-width: 800px;
|
||||
}
|
||||
|
||||
div.body p, div.body dd, div.body li, div.body blockquote {
|
||||
-moz-hyphens: auto;
|
||||
-ms-hyphens: auto;
|
||||
-webkit-hyphens: auto;
|
||||
hyphens: auto;
|
||||
}
|
||||
|
||||
a.headerlink {
|
||||
visibility: hidden;
|
||||
}
|
||||
|
||||
a:visited {
|
||||
color: #551A8B;
|
||||
}
|
||||
|
||||
h1:hover > a.headerlink,
|
||||
h2:hover > a.headerlink,
|
||||
h3:hover > a.headerlink,
|
||||
h4:hover > a.headerlink,
|
||||
h5:hover > a.headerlink,
|
||||
h6:hover > a.headerlink,
|
||||
dt:hover > a.headerlink,
|
||||
caption:hover > a.headerlink,
|
||||
p.caption:hover > a.headerlink,
|
||||
div.code-block-caption:hover > a.headerlink {
|
||||
visibility: visible;
|
||||
}
|
||||
|
||||
div.body p.caption {
|
||||
text-align: inherit;
|
||||
}
|
||||
|
||||
div.body td {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.first {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
p.rubric {
|
||||
margin-top: 30px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
img.align-left, figure.align-left, .figure.align-left, object.align-left {
|
||||
clear: left;
|
||||
float: left;
|
||||
margin-right: 1em;
|
||||
}
|
||||
|
||||
img.align-right, figure.align-right, .figure.align-right, object.align-right {
|
||||
clear: right;
|
||||
float: right;
|
||||
margin-left: 1em;
|
||||
}
|
||||
|
||||
img.align-center, figure.align-center, .figure.align-center, object.align-center {
|
||||
display: block;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
img.align-default, figure.align-default, .figure.align-default {
|
||||
display: block;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
.align-left {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.align-center {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.align-default {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.align-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
/* -- sidebars -------------------------------------------------------------- */
|
||||
|
||||
div.sidebar,
|
||||
aside.sidebar {
|
||||
margin: 0 0 0.5em 1em;
|
||||
border: 1px solid #ddb;
|
||||
padding: 7px;
|
||||
background-color: #ffe;
|
||||
width: 40%;
|
||||
float: right;
|
||||
clear: right;
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
p.sidebar-title {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
nav.contents,
|
||||
aside.topic,
|
||||
div.admonition, div.topic, blockquote {
|
||||
clear: left;
|
||||
}
|
||||
|
||||
/* -- topics ---------------------------------------------------------------- */
|
||||
|
||||
nav.contents,
|
||||
aside.topic,
|
||||
div.topic {
|
||||
border: 1px solid #ccc;
|
||||
padding: 7px;
|
||||
margin: 10px 0 10px 0;
|
||||
}
|
||||
|
||||
p.topic-title {
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
/* -- admonitions ----------------------------------------------------------- */
|
||||
|
||||
div.admonition {
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
padding: 7px;
|
||||
}
|
||||
|
||||
div.admonition dt {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
p.admonition-title {
|
||||
margin: 0px 10px 5px 0px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
div.body p.centered {
|
||||
text-align: center;
|
||||
margin-top: 25px;
|
||||
}
|
||||
|
||||
/* -- content of sidebars/topics/admonitions -------------------------------- */
|
||||
|
||||
div.sidebar > :last-child,
|
||||
aside.sidebar > :last-child,
|
||||
nav.contents > :last-child,
|
||||
aside.topic > :last-child,
|
||||
div.topic > :last-child,
|
||||
div.admonition > :last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
div.sidebar::after,
|
||||
aside.sidebar::after,
|
||||
nav.contents::after,
|
||||
aside.topic::after,
|
||||
div.topic::after,
|
||||
div.admonition::after,
|
||||
blockquote::after {
|
||||
display: block;
|
||||
content: '';
|
||||
clear: both;
|
||||
}
|
||||
|
||||
/* -- tables ---------------------------------------------------------------- */
|
||||
|
||||
table.docutils {
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
border: 0;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
table.align-center {
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
table.align-default {
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
table caption span.caption-number {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
table caption span.caption-text {
|
||||
}
|
||||
|
||||
table.docutils td, table.docutils th {
|
||||
padding: 1px 8px 1px 5px;
|
||||
border-top: 0;
|
||||
border-left: 0;
|
||||
border-right: 0;
|
||||
border-bottom: 1px solid #aaa;
|
||||
}
|
||||
|
||||
th {
|
||||
text-align: left;
|
||||
padding-right: 5px;
|
||||
}
|
||||
|
||||
table.citation {
|
||||
border-left: solid 1px gray;
|
||||
margin-left: 1px;
|
||||
}
|
||||
|
||||
table.citation td {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
th > :first-child,
|
||||
td > :first-child {
|
||||
margin-top: 0px;
|
||||
}
|
||||
|
||||
th > :last-child,
|
||||
td > :last-child {
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
/* -- figures --------------------------------------------------------------- */
|
||||
|
||||
div.figure, figure {
|
||||
margin: 0.5em;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.figure p.caption, figcaption {
|
||||
padding: 0.3em;
|
||||
}
|
||||
|
||||
div.figure p.caption span.caption-number,
|
||||
figcaption span.caption-number {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
div.figure p.caption span.caption-text,
|
||||
figcaption span.caption-text {
|
||||
}
|
||||
|
||||
/* -- field list styles ----------------------------------------------------- */
|
||||
|
||||
table.field-list td, table.field-list th {
|
||||
border: 0 !important;
|
||||
}
|
||||
|
||||
.field-list ul {
|
||||
margin: 0;
|
||||
padding-left: 1em;
|
||||
}
|
||||
|
||||
.field-list p {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.field-name {
|
||||
-moz-hyphens: manual;
|
||||
-ms-hyphens: manual;
|
||||
-webkit-hyphens: manual;
|
||||
hyphens: manual;
|
||||
}
|
||||
|
||||
/* -- hlist styles ---------------------------------------------------------- */
|
||||
|
||||
table.hlist {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
table.hlist td {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
/* -- object description styles --------------------------------------------- */
|
||||
|
||||
.sig {
|
||||
font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
|
||||
}
|
||||
|
||||
.sig-name, code.descname {
|
||||
background-color: transparent;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.sig-name {
|
||||
font-size: 1.1em;
|
||||
}
|
||||
|
||||
code.descname {
|
||||
font-size: 1.2em;
|
||||
}
|
||||
|
||||
.sig-prename, code.descclassname {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.optional {
|
||||
font-size: 1.3em;
|
||||
}
|
||||
|
||||
.sig-paren {
|
||||
font-size: larger;
|
||||
}
|
||||
|
||||
.sig-param.n {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* C++ specific styling */
|
||||
|
||||
.sig-inline.c-texpr,
|
||||
.sig-inline.cpp-texpr {
|
||||
font-family: unset;
|
||||
}
|
||||
|
||||
.sig.c .k, .sig.c .kt,
|
||||
.sig.cpp .k, .sig.cpp .kt {
|
||||
color: #0033B3;
|
||||
}
|
||||
|
||||
.sig.c .m,
|
||||
.sig.cpp .m {
|
||||
color: #1750EB;
|
||||
}
|
||||
|
||||
.sig.c .s, .sig.c .sc,
|
||||
.sig.cpp .s, .sig.cpp .sc {
|
||||
color: #067D17;
|
||||
}
|
||||
|
||||
|
||||
/* -- other body styles ----------------------------------------------------- */
|
||||
|
||||
ol.arabic {
|
||||
list-style: decimal;
|
||||
}
|
||||
|
||||
ol.loweralpha {
|
||||
list-style: lower-alpha;
|
||||
}
|
||||
|
||||
ol.upperalpha {
|
||||
list-style: upper-alpha;
|
||||
}
|
||||
|
||||
ol.lowerroman {
|
||||
list-style: lower-roman;
|
||||
}
|
||||
|
||||
ol.upperroman {
|
||||
list-style: upper-roman;
|
||||
}
|
||||
|
||||
:not(li) > ol > li:first-child > :first-child,
|
||||
:not(li) > ul > li:first-child > :first-child {
|
||||
margin-top: 0px;
|
||||
}
|
||||
|
||||
:not(li) > ol > li:last-child > :last-child,
|
||||
:not(li) > ul > li:last-child > :last-child {
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
ol.simple ol p,
|
||||
ol.simple ul p,
|
||||
ul.simple ol p,
|
||||
ul.simple ul p {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
ol.simple > li:not(:first-child) > p,
|
||||
ul.simple > li:not(:first-child) > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
ol.simple p,
|
||||
ul.simple p {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
aside.footnote > span,
|
||||
div.citation > span {
|
||||
float: left;
|
||||
}
|
||||
aside.footnote > span:last-of-type,
|
||||
div.citation > span:last-of-type {
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
aside.footnote > p {
|
||||
margin-left: 2em;
|
||||
}
|
||||
div.citation > p {
|
||||
margin-left: 4em;
|
||||
}
|
||||
aside.footnote > p:last-of-type,
|
||||
div.citation > p:last-of-type {
|
||||
margin-bottom: 0em;
|
||||
}
|
||||
aside.footnote > p:last-of-type:after,
|
||||
div.citation > p:last-of-type:after {
|
||||
content: "";
|
||||
clear: both;
|
||||
}
|
||||
|
||||
dl.field-list {
|
||||
display: grid;
|
||||
grid-template-columns: fit-content(30%) auto;
|
||||
}
|
||||
|
||||
dl.field-list > dt {
|
||||
font-weight: bold;
|
||||
word-break: break-word;
|
||||
padding-left: 0.5em;
|
||||
padding-right: 5px;
|
||||
}
|
||||
|
||||
dl.field-list > dd {
|
||||
padding-left: 0.5em;
|
||||
margin-top: 0em;
|
||||
margin-left: 0em;
|
||||
margin-bottom: 0em;
|
||||
}
|
||||
|
||||
dl {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
dd > :first-child {
|
||||
margin-top: 0px;
|
||||
}
|
||||
|
||||
dd ul, dd table {
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
dd {
|
||||
margin-top: 3px;
|
||||
margin-bottom: 10px;
|
||||
margin-left: 30px;
|
||||
}
|
||||
|
||||
.sig dd {
|
||||
margin-top: 0px;
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
.sig dl {
|
||||
margin-top: 0px;
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
dl > dd:last-child,
|
||||
dl > dd:last-child > :last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
dt:target, span.highlighted {
|
||||
background-color: #fbe54e;
|
||||
}
|
||||
|
||||
rect.highlighted {
|
||||
fill: #fbe54e;
|
||||
}
|
||||
|
||||
dl.glossary dt {
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
|
||||
.versionmodified {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.system-message {
|
||||
background-color: #fda;
|
||||
padding: 5px;
|
||||
border: 3px solid red;
|
||||
}
|
||||
|
||||
.footnote:target {
|
||||
background-color: #ffa;
|
||||
}
|
||||
|
||||
.line-block {
|
||||
display: block;
|
||||
margin-top: 1em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
.line-block .line-block {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
margin-left: 1.5em;
|
||||
}
|
||||
|
||||
.guilabel, .menuselection {
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
.accelerator {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.classifier {
|
||||
font-style: oblique;
|
||||
}
|
||||
|
||||
.classifier:before {
|
||||
font-style: normal;
|
||||
margin: 0 0.5em;
|
||||
content: ":";
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
abbr, acronym {
|
||||
border-bottom: dotted 1px;
|
||||
cursor: help;
|
||||
}
|
||||
|
||||
.translated {
|
||||
background-color: rgba(207, 255, 207, 0.2)
|
||||
}
|
||||
|
||||
.untranslated {
|
||||
background-color: rgba(255, 207, 207, 0.2)
|
||||
}
|
||||
|
||||
/* -- code displays --------------------------------------------------------- */
|
||||
|
||||
pre {
|
||||
overflow: auto;
|
||||
overflow-y: hidden; /* fixes display issues on Chrome browsers */
|
||||
}
|
||||
|
||||
pre, div[class*="highlight-"] {
|
||||
clear: both;
|
||||
}
|
||||
|
||||
span.pre {
|
||||
-moz-hyphens: none;
|
||||
-ms-hyphens: none;
|
||||
-webkit-hyphens: none;
|
||||
hyphens: none;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
div[class*="highlight-"] {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
td.linenos pre {
|
||||
border: 0;
|
||||
background-color: transparent;
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
table.highlighttable {
|
||||
display: block;
|
||||
}
|
||||
|
||||
table.highlighttable tbody {
|
||||
display: block;
|
||||
}
|
||||
|
||||
table.highlighttable tr {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
table.highlighttable td {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
table.highlighttable td.linenos {
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
|
||||
table.highlighttable td.code {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.highlight .hll {
|
||||
display: block;
|
||||
}
|
||||
|
||||
div.highlight pre,
|
||||
table.highlighttable pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
div.code-block-caption + div {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
div.code-block-caption {
|
||||
margin-top: 1em;
|
||||
padding: 2px 5px;
|
||||
font-size: small;
|
||||
}
|
||||
|
||||
div.code-block-caption code {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
table.highlighttable td.linenos,
|
||||
span.linenos,
|
||||
div.highlight span.gp { /* gp: Generic.Prompt */
|
||||
user-select: none;
|
||||
-webkit-user-select: text; /* Safari fallback only */
|
||||
-webkit-user-select: none; /* Chrome/Safari */
|
||||
-moz-user-select: none; /* Firefox */
|
||||
-ms-user-select: none; /* IE10+ */
|
||||
}
|
||||
|
||||
div.code-block-caption span.caption-number {
|
||||
padding: 0.1em 0.3em;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
div.code-block-caption span.caption-text {
|
||||
}
|
||||
|
||||
div.literal-block-wrapper {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
code.xref, a code {
|
||||
background-color: transparent;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.viewcode-link {
|
||||
float: right;
|
||||
}
|
||||
|
||||
.viewcode-back {
|
||||
float: right;
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
div.viewcode-block:target {
|
||||
margin: -1px -10px;
|
||||
padding: 0 10px;
|
||||
}
|
||||
|
||||
/* -- math display ---------------------------------------------------------- */
|
||||
|
||||
img.math {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
div.body div.math p {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
span.eqno {
|
||||
float: right;
|
||||
}
|
||||
|
||||
span.eqno a.headerlink {
|
||||
position: absolute;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
div.math:hover a.headerlink {
|
||||
visibility: visible;
|
||||
}
|
||||
|
||||
/* -- printout stylesheet --------------------------------------------------- */
|
||||
|
||||
@media print {
|
||||
div.document,
|
||||
div.documentwrapper,
|
||||
div.bodywrapper {
|
||||
margin: 0 !important;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
div.sphinxsidebar,
|
||||
div.related,
|
||||
div.footer,
|
||||
#top-link {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 107 B |
|
|
@ -1 +0,0 @@
|
|||
.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
|
||||
|
Before Width: | Height: | Size: 434 KiB |
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* doctools.js
|
||||
* ~~~~~~~~~~~
|
||||
*
|
||||
* Base JavaScript utilities for all Sphinx HTML documentation.
|
||||
*
|
||||
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
|
||||
* :license: BSD, see LICENSE for details.
|
||||
*
|
||||
*/
|
||||
"use strict";
|
||||
|
||||
const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([
|
||||
"TEXTAREA",
|
||||
"INPUT",
|
||||
"SELECT",
|
||||
"BUTTON",
|
||||
]);
|
||||
|
||||
const _ready = (callback) => {
|
||||
if (document.readyState !== "loading") {
|
||||
callback();
|
||||
} else {
|
||||
document.addEventListener("DOMContentLoaded", callback);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Small JavaScript module for the documentation.
|
||||
*/
|
||||
const Documentation = {
|
||||
init: () => {
|
||||
Documentation.initDomainIndexTable();
|
||||
Documentation.initOnKeyListeners();
|
||||
},
|
||||
|
||||
/**
|
||||
* i18n support
|
||||
*/
|
||||
TRANSLATIONS: {},
|
||||
PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
|
||||
LOCALE: "unknown",
|
||||
|
||||
// gettext and ngettext don't access this so that the functions
|
||||
// can safely bound to a different name (_ = Documentation.gettext)
|
||||
gettext: (string) => {
|
||||
const translated = Documentation.TRANSLATIONS[string];
|
||||
switch (typeof translated) {
|
||||
case "undefined":
|
||||
return string; // no translation
|
||||
case "string":
|
||||
return translated; // translation exists
|
||||
default:
|
||||
return translated[0]; // (singular, plural) translation tuple exists
|
||||
}
|
||||
},
|
||||
|
||||
ngettext: (singular, plural, n) => {
|
||||
const translated = Documentation.TRANSLATIONS[singular];
|
||||
if (typeof translated !== "undefined")
|
||||
return translated[Documentation.PLURAL_EXPR(n)];
|
||||
return n === 1 ? singular : plural;
|
||||
},
|
||||
|
||||
addTranslations: (catalog) => {
|
||||
Object.assign(Documentation.TRANSLATIONS, catalog.messages);
|
||||
Documentation.PLURAL_EXPR = new Function(
|
||||
"n",
|
||||
`return (${catalog.plural_expr})`
|
||||
);
|
||||
Documentation.LOCALE = catalog.locale;
|
||||
},
|
||||
|
||||
/**
|
||||
* helper function to focus on search bar
|
||||
*/
|
||||
focusSearchBar: () => {
|
||||
document.querySelectorAll("input[name=q]")[0]?.focus();
|
||||
},
|
||||
|
||||
/**
|
||||
* Initialise the domain index toggle buttons
|
||||
*/
|
||||
initDomainIndexTable: () => {
|
||||
const toggler = (el) => {
|
||||
const idNumber = el.id.substr(7);
|
||||
const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
|
||||
if (el.src.substr(-9) === "minus.png") {
|
||||
el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
|
||||
toggledRows.forEach((el) => (el.style.display = "none"));
|
||||
} else {
|
||||
el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
|
||||
toggledRows.forEach((el) => (el.style.display = ""));
|
||||
}
|
||||
};
|
||||
|
||||
const togglerElements = document.querySelectorAll("img.toggler");
|
||||
togglerElements.forEach((el) =>
|
||||
el.addEventListener("click", (event) => toggler(event.currentTarget))
|
||||
);
|
||||
togglerElements.forEach((el) => (el.style.display = ""));
|
||||
if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
|
||||
},
|
||||
|
||||
initOnKeyListeners: () => {
|
||||
// only install a listener if it is really needed
|
||||
if (
|
||||
!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
|
||||
!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
|
||||
)
|
||||
return;
|
||||
|
||||
document.addEventListener("keydown", (event) => {
|
||||
// bail for input elements
|
||||
if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
|
||||
// bail with special keys
|
||||
if (event.altKey || event.ctrlKey || event.metaKey) return;
|
||||
|
||||
if (!event.shiftKey) {
|
||||
switch (event.key) {
|
||||
case "ArrowLeft":
|
||||
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
|
||||
|
||||
const prevLink = document.querySelector('link[rel="prev"]');
|
||||
if (prevLink && prevLink.href) {
|
||||
window.location.href = prevLink.href;
|
||||
event.preventDefault();
|
||||
}
|
||||
break;
|
||||
case "ArrowRight":
|
||||
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
|
||||
|
||||
const nextLink = document.querySelector('link[rel="next"]');
|
||||
if (nextLink && nextLink.href) {
|
||||
window.location.href = nextLink.href;
|
||||
event.preventDefault();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// some keyboard layouts may need Shift to get /
|
||||
switch (event.key) {
|
||||
case "/":
|
||||
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
|
||||
Documentation.focusSearchBar();
|
||||
event.preventDefault();
|
||||
}
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
// quick alias for translations
|
||||
const _ = Documentation.gettext;
|
||||
|
||||
_ready(Documentation.init);
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
const DOCUMENTATION_OPTIONS = {
|
||||
VERSION: '0.1.8',
|
||||
LANGUAGE: 'en',
|
||||
COLLAPSE_INDEX: false,
|
||||
BUILDER: 'html',
|
||||
FILE_SUFFIX: '.html',
|
||||
LINK_SUFFIX: '.html',
|
||||
HAS_SOURCE: true,
|
||||
SOURCELINK_SUFFIX: '.txt',
|
||||
NAVIGATION_WITH_KEYS: false,
|
||||
SHOW_SEARCH_SUMMARY: true,
|
||||
ENABLE_SEARCH_SHORTCUTS: true,
|
||||
};
|
||||
|
After Width: | Height: | Size: 286 B |
|
|
@ -1 +0,0 @@
|
|||
!function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
/**
|
||||
* @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
|
||||
*/
|
||||
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document);
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
/**
|
||||
* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
|
||||
*/
|
||||
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);
|
||||
|
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* language_data.js
|
||||
* ~~~~~~~~~~~~~~~~
|
||||
*
|
||||
* This script contains the language-specific data used by searchtools.js,
|
||||
* namely the list of stopwords, stemmer, scorer and splitter.
|
||||
*
|
||||
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
|
||||
* :license: BSD, see LICENSE for details.
|
||||
*
|
||||
*/
|
||||
|
||||
var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
|
||||
|
||||
|
||||
/* Non-minified version is copied as a separate JS file, is available */
|
||||
|
||||
/**
|
||||
* Porter Stemmer
|
||||
*/
|
||||
var Stemmer = function() {
|
||||
|
||||
var step2list = {
|
||||
ational: 'ate',
|
||||
tional: 'tion',
|
||||
enci: 'ence',
|
||||
anci: 'ance',
|
||||
izer: 'ize',
|
||||
bli: 'ble',
|
||||
alli: 'al',
|
||||
entli: 'ent',
|
||||
eli: 'e',
|
||||
ousli: 'ous',
|
||||
ization: 'ize',
|
||||
ation: 'ate',
|
||||
ator: 'ate',
|
||||
alism: 'al',
|
||||
iveness: 'ive',
|
||||
fulness: 'ful',
|
||||
ousness: 'ous',
|
||||
aliti: 'al',
|
||||
iviti: 'ive',
|
||||
biliti: 'ble',
|
||||
logi: 'log'
|
||||
};
|
||||
|
||||
var step3list = {
|
||||
icate: 'ic',
|
||||
ative: '',
|
||||
alize: 'al',
|
||||
iciti: 'ic',
|
||||
ical: 'ic',
|
||||
ful: '',
|
||||
ness: ''
|
||||
};
|
||||
|
||||
var c = "[^aeiou]"; // consonant
|
||||
var v = "[aeiouy]"; // vowel
|
||||
var C = c + "[^aeiouy]*"; // consonant sequence
|
||||
var V = v + "[aeiou]*"; // vowel sequence
|
||||
|
||||
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
|
||||
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
|
||||
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
|
||||
var s_v = "^(" + C + ")?" + v; // vowel in stem
|
||||
|
||||
this.stemWord = function (w) {
|
||||
var stem;
|
||||
var suffix;
|
||||
var firstch;
|
||||
var origword = w;
|
||||
|
||||
if (w.length < 3)
|
||||
return w;
|
||||
|
||||
var re;
|
||||
var re2;
|
||||
var re3;
|
||||
var re4;
|
||||
|
||||
firstch = w.substr(0,1);
|
||||
if (firstch == "y")
|
||||
w = firstch.toUpperCase() + w.substr(1);
|
||||
|
||||
// Step 1a
|
||||
re = /^(.+?)(ss|i)es$/;
|
||||
re2 = /^(.+?)([^s])s$/;
|
||||
|
||||
if (re.test(w))
|
||||
w = w.replace(re,"$1$2");
|
||||
else if (re2.test(w))
|
||||
w = w.replace(re2,"$1$2");
|
||||
|
||||
// Step 1b
|
||||
re = /^(.+?)eed$/;
|
||||
re2 = /^(.+?)(ed|ing)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(fp[1])) {
|
||||
re = /.$/;
|
||||
w = w.replace(re,"");
|
||||
}
|
||||
}
|
||||
else if (re2.test(w)) {
|
||||
var fp = re2.exec(w);
|
||||
stem = fp[1];
|
||||
re2 = new RegExp(s_v);
|
||||
if (re2.test(stem)) {
|
||||
w = stem;
|
||||
re2 = /(at|bl|iz)$/;
|
||||
re3 = new RegExp("([^aeiouylsz])\\1$");
|
||||
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
||||
if (re2.test(w))
|
||||
w = w + "e";
|
||||
else if (re3.test(w)) {
|
||||
re = /.$/;
|
||||
w = w.replace(re,"");
|
||||
}
|
||||
else if (re4.test(w))
|
||||
w = w + "e";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 1c
|
||||
re = /^(.+?)y$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
re = new RegExp(s_v);
|
||||
if (re.test(stem))
|
||||
w = stem + "i";
|
||||
}
|
||||
|
||||
// Step 2
|
||||
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
suffix = fp[2];
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(stem))
|
||||
w = stem + step2list[suffix];
|
||||
}
|
||||
|
||||
// Step 3
|
||||
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
suffix = fp[2];
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(stem))
|
||||
w = stem + step3list[suffix];
|
||||
}
|
||||
|
||||
// Step 4
|
||||
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
|
||||
re2 = /^(.+?)(s|t)(ion)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
re = new RegExp(mgr1);
|
||||
if (re.test(stem))
|
||||
w = stem;
|
||||
}
|
||||
else if (re2.test(w)) {
|
||||
var fp = re2.exec(w);
|
||||
stem = fp[1] + fp[2];
|
||||
re2 = new RegExp(mgr1);
|
||||
if (re2.test(stem))
|
||||
w = stem;
|
||||
}
|
||||
|
||||
// Step 5
|
||||
re = /^(.+?)e$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
re = new RegExp(mgr1);
|
||||
re2 = new RegExp(meq1);
|
||||
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
||||
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
|
||||
w = stem;
|
||||
}
|
||||
re = /ll$/;
|
||||
re2 = new RegExp(mgr1);
|
||||
if (re.test(w) && re2.test(w)) {
|
||||
re = /.$/;
|
||||
w = w.replace(re,"");
|
||||
}
|
||||
|
||||
// and turn initial Y back to y
|
||||
if (firstch == "y")
|
||||
w = firstch.toLowerCase() + w.substr(1);
|
||||
return w;
|
||||
}
|
||||
}
|
||||
|
||||
|
After Width: | Height: | Size: 90 B |
|
Before Width: | Height: | Size: 120 B |
|
After Width: | Height: | Size: 90 B |
|
|
@ -0,0 +1,74 @@
|
|||
pre { line-height: 125%; }
|
||||
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
|
||||
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
|
||||
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
|
||||
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
|
||||
.highlight .hll { background-color: #ffffcc }
|
||||
.highlight { background: #f8f8f8; }
|
||||
.highlight .c { color: #3D7B7B; font-style: italic } /* Comment */
|
||||
.highlight .err { border: 1px solid #FF0000 } /* Error */
|
||||
.highlight .k { color: #008000; font-weight: bold } /* Keyword */
|
||||
.highlight .o { color: #666666 } /* Operator */
|
||||
.highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */
|
||||
.highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */
|
||||
.highlight .cp { color: #9C6500 } /* Comment.Preproc */
|
||||
.highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */
|
||||
.highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */
|
||||
.highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */
|
||||
.highlight .gd { color: #A00000 } /* Generic.Deleted */
|
||||
.highlight .ge { font-style: italic } /* Generic.Emph */
|
||||
.highlight .gr { color: #E40000 } /* Generic.Error */
|
||||
.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
|
||||
.highlight .gi { color: #008400 } /* Generic.Inserted */
|
||||
.highlight .go { color: #717171 } /* Generic.Output */
|
||||
.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
|
||||
.highlight .gs { font-weight: bold } /* Generic.Strong */
|
||||
.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
|
||||
.highlight .gt { color: #0044DD } /* Generic.Traceback */
|
||||
.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
|
||||
.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
|
||||
.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
|
||||
.highlight .kp { color: #008000 } /* Keyword.Pseudo */
|
||||
.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
|
||||
.highlight .kt { color: #B00040 } /* Keyword.Type */
|
||||
.highlight .m { color: #666666 } /* Literal.Number */
|
||||
.highlight .s { color: #BA2121 } /* Literal.String */
|
||||
.highlight .na { color: #687822 } /* Name.Attribute */
|
||||
.highlight .nb { color: #008000 } /* Name.Builtin */
|
||||
.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */
|
||||
.highlight .no { color: #880000 } /* Name.Constant */
|
||||
.highlight .nd { color: #AA22FF } /* Name.Decorator */
|
||||
.highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */
|
||||
.highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */
|
||||
.highlight .nf { color: #0000FF } /* Name.Function */
|
||||
.highlight .nl { color: #767600 } /* Name.Label */
|
||||
.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
|
||||
.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */
|
||||
.highlight .nv { color: #19177C } /* Name.Variable */
|
||||
.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
|
||||
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
|
||||
.highlight .mb { color: #666666 } /* Literal.Number.Bin */
|
||||
.highlight .mf { color: #666666 } /* Literal.Number.Float */
|
||||
.highlight .mh { color: #666666 } /* Literal.Number.Hex */
|
||||
.highlight .mi { color: #666666 } /* Literal.Number.Integer */
|
||||
.highlight .mo { color: #666666 } /* Literal.Number.Oct */
|
||||
.highlight .sa { color: #BA2121 } /* Literal.String.Affix */
|
||||
.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */
|
||||
.highlight .sc { color: #BA2121 } /* Literal.String.Char */
|
||||
.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */
|
||||
.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
|
||||
.highlight .s2 { color: #BA2121 } /* Literal.String.Double */
|
||||
.highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */
|
||||
.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */
|
||||
.highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */
|
||||
.highlight .sx { color: #008000 } /* Literal.String.Other */
|
||||
.highlight .sr { color: #A45A77 } /* Literal.String.Regex */
|
||||
.highlight .s1 { color: #BA2121 } /* Literal.String.Single */
|
||||
.highlight .ss { color: #19177C } /* Literal.String.Symbol */
|
||||
.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */
|
||||
.highlight .fm { color: #0000FF } /* Name.Function.Magic */
|
||||
.highlight .vc { color: #19177C } /* Name.Variable.Class */
|
||||
.highlight .vg { color: #19177C } /* Name.Variable.Global */
|
||||
.highlight .vi { color: #19177C } /* Name.Variable.Instance */
|
||||
.highlight .vm { color: #19177C } /* Name.Variable.Magic */
|
||||
.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */
|
||||
|
|
@ -0,0 +1,574 @@
|
|||
/*
|
||||
* searchtools.js
|
||||
* ~~~~~~~~~~~~~~~~
|
||||
*
|
||||
* Sphinx JavaScript utilities for the full-text search.
|
||||
*
|
||||
* :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS.
|
||||
* :license: BSD, see LICENSE for details.
|
||||
*
|
||||
*/
|
||||
"use strict";
|
||||
|
||||
/**
|
||||
* Simple result scoring code.
|
||||
*/
|
||||
if (typeof Scorer === "undefined") {
|
||||
var Scorer = {
|
||||
// Implement the following function to further tweak the score for each result
|
||||
// The function takes a result array [docname, title, anchor, descr, score, filename]
|
||||
// and returns the new score.
|
||||
/*
|
||||
score: result => {
|
||||
const [docname, title, anchor, descr, score, filename] = result
|
||||
return score
|
||||
},
|
||||
*/
|
||||
|
||||
// query matches the full name of an object
|
||||
objNameMatch: 11,
|
||||
// or matches in the last dotted part of the object name
|
||||
objPartialMatch: 6,
|
||||
// Additive scores depending on the priority of the object
|
||||
objPrio: {
|
||||
0: 15, // used to be importantResults
|
||||
1: 5, // used to be objectResults
|
||||
2: -5, // used to be unimportantResults
|
||||
},
|
||||
// Used when the priority is not in the mapping.
|
||||
objPrioDefault: 0,
|
||||
|
||||
// query found in title
|
||||
title: 15,
|
||||
partialTitle: 7,
|
||||
// query found in terms
|
||||
term: 5,
|
||||
partialTerm: 2,
|
||||
};
|
||||
}
|
||||
|
||||
const _removeChildren = (element) => {
|
||||
while (element && element.lastChild) element.removeChild(element.lastChild);
|
||||
};
|
||||
|
||||
/**
|
||||
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping
|
||||
*/
|
||||
const _escapeRegExp = (string) =>
|
||||
string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
|
||||
|
||||
const _displayItem = (item, searchTerms, highlightTerms) => {
|
||||
const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
|
||||
const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
|
||||
const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX;
|
||||
const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY;
|
||||
const contentRoot = document.documentElement.dataset.content_root;
|
||||
|
||||
const [docName, title, anchor, descr, score, _filename] = item;
|
||||
|
||||
let listItem = document.createElement("li");
|
||||
let requestUrl;
|
||||
let linkUrl;
|
||||
if (docBuilder === "dirhtml") {
|
||||
// dirhtml builder
|
||||
let dirname = docName + "/";
|
||||
if (dirname.match(/\/index\/$/))
|
||||
dirname = dirname.substring(0, dirname.length - 6);
|
||||
else if (dirname === "index/") dirname = "";
|
||||
requestUrl = contentRoot + dirname;
|
||||
linkUrl = requestUrl;
|
||||
} else {
|
||||
// normal html builders
|
||||
requestUrl = contentRoot + docName + docFileSuffix;
|
||||
linkUrl = docName + docLinkSuffix;
|
||||
}
|
||||
let linkEl = listItem.appendChild(document.createElement("a"));
|
||||
linkEl.href = linkUrl + anchor;
|
||||
linkEl.dataset.score = score;
|
||||
linkEl.innerHTML = title;
|
||||
if (descr) {
|
||||
listItem.appendChild(document.createElement("span")).innerHTML =
|
||||
" (" + descr + ")";
|
||||
// highlight search terms in the description
|
||||
if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js
|
||||
highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));
|
||||
}
|
||||
else if (showSearchSummary)
|
||||
fetch(requestUrl)
|
||||
.then((responseData) => responseData.text())
|
||||
.then((data) => {
|
||||
if (data)
|
||||
listItem.appendChild(
|
||||
Search.makeSearchSummary(data, searchTerms)
|
||||
);
|
||||
// highlight search terms in the summary
|
||||
if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js
|
||||
highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));
|
||||
});
|
||||
Search.output.appendChild(listItem);
|
||||
};
|
||||
const _finishSearch = (resultCount) => {
|
||||
Search.stopPulse();
|
||||
Search.title.innerText = _("Search Results");
|
||||
if (!resultCount)
|
||||
Search.status.innerText = Documentation.gettext(
|
||||
"Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories."
|
||||
);
|
||||
else
|
||||
Search.status.innerText = _(
|
||||
`Search finished, found ${resultCount} page(s) matching the search query.`
|
||||
);
|
||||
};
|
||||
const _displayNextItem = (
|
||||
results,
|
||||
resultCount,
|
||||
searchTerms,
|
||||
highlightTerms,
|
||||
) => {
|
||||
// results left, load the summary and display it
|
||||
// this is intended to be dynamic (don't sub resultsCount)
|
||||
if (results.length) {
|
||||
_displayItem(results.pop(), searchTerms, highlightTerms);
|
||||
setTimeout(
|
||||
() => _displayNextItem(results, resultCount, searchTerms, highlightTerms),
|
||||
5
|
||||
);
|
||||
}
|
||||
// search finished, update title and status message
|
||||
else _finishSearch(resultCount);
|
||||
};
|
||||
|
||||
/**
|
||||
* Default splitQuery function. Can be overridden in ``sphinx.search`` with a
|
||||
* custom function per language.
|
||||
*
|
||||
* The regular expression works by splitting the string on consecutive characters
|
||||
* that are not Unicode letters, numbers, underscores, or emoji characters.
|
||||
* This is the same as ``\W+`` in Python, preserving the surrogate pair area.
|
||||
*/
|
||||
if (typeof splitQuery === "undefined") {
|
||||
var splitQuery = (query) => query
|
||||
.split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
|
||||
.filter(term => term) // remove remaining empty strings
|
||||
}
|
||||
|
||||
/**
|
||||
* Search Module
|
||||
*/
|
||||
const Search = {
|
||||
_index: null,
|
||||
_queued_query: null,
|
||||
_pulse_status: -1,
|
||||
|
||||
htmlToText: (htmlString) => {
|
||||
const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html');
|
||||
htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() });
|
||||
const docContent = htmlElement.querySelector('[role="main"]');
|
||||
if (docContent) return docContent.textContent;
|
||||
console.warn(
|
||||
"Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template."
|
||||
);
|
||||
return "";
|
||||
},
|
||||
|
||||
init: () => {
|
||||
const query = new URLSearchParams(window.location.search).get("q");
|
||||
document
|
||||
.querySelectorAll('input[name="q"]')
|
||||
.forEach((el) => (el.value = query));
|
||||
if (query) Search.performSearch(query);
|
||||
},
|
||||
|
||||
loadIndex: (url) =>
|
||||
(document.body.appendChild(document.createElement("script")).src = url),
|
||||
|
||||
setIndex: (index) => {
|
||||
Search._index = index;
|
||||
if (Search._queued_query !== null) {
|
||||
const query = Search._queued_query;
|
||||
Search._queued_query = null;
|
||||
Search.query(query);
|
||||
}
|
||||
},
|
||||
|
||||
hasIndex: () => Search._index !== null,
|
||||
|
||||
deferQuery: (query) => (Search._queued_query = query),
|
||||
|
||||
stopPulse: () => (Search._pulse_status = -1),
|
||||
|
||||
startPulse: () => {
|
||||
if (Search._pulse_status >= 0) return;
|
||||
|
||||
const pulse = () => {
|
||||
Search._pulse_status = (Search._pulse_status + 1) % 4;
|
||||
Search.dots.innerText = ".".repeat(Search._pulse_status);
|
||||
if (Search._pulse_status >= 0) window.setTimeout(pulse, 500);
|
||||
};
|
||||
pulse();
|
||||
},
|
||||
|
||||
/**
|
||||
* perform a search for something (or wait until index is loaded)
|
||||
*/
|
||||
performSearch: (query) => {
|
||||
// create the required interface elements
|
||||
const searchText = document.createElement("h2");
|
||||
searchText.textContent = _("Searching");
|
||||
const searchSummary = document.createElement("p");
|
||||
searchSummary.classList.add("search-summary");
|
||||
searchSummary.innerText = "";
|
||||
const searchList = document.createElement("ul");
|
||||
searchList.classList.add("search");
|
||||
|
||||
const out = document.getElementById("search-results");
|
||||
Search.title = out.appendChild(searchText);
|
||||
Search.dots = Search.title.appendChild(document.createElement("span"));
|
||||
Search.status = out.appendChild(searchSummary);
|
||||
Search.output = out.appendChild(searchList);
|
||||
|
||||
const searchProgress = document.getElementById("search-progress");
|
||||
// Some themes don't use the search progress node
|
||||
if (searchProgress) {
|
||||
searchProgress.innerText = _("Preparing search...");
|
||||
}
|
||||
Search.startPulse();
|
||||
|
||||
// index already loaded, the browser was quick!
|
||||
if (Search.hasIndex()) Search.query(query);
|
||||
else Search.deferQuery(query);
|
||||
},
|
||||
|
||||
/**
|
||||
* execute search (requires search index to be loaded)
|
||||
*/
|
||||
query: (query) => {
|
||||
const filenames = Search._index.filenames;
|
||||
const docNames = Search._index.docnames;
|
||||
const titles = Search._index.titles;
|
||||
const allTitles = Search._index.alltitles;
|
||||
const indexEntries = Search._index.indexentries;
|
||||
|
||||
// stem the search terms and add them to the correct list
|
||||
const stemmer = new Stemmer();
|
||||
const searchTerms = new Set();
|
||||
const excludedTerms = new Set();
|
||||
const highlightTerms = new Set();
|
||||
const objectTerms = new Set(splitQuery(query.toLowerCase().trim()));
|
||||
splitQuery(query.trim()).forEach((queryTerm) => {
|
||||
const queryTermLower = queryTerm.toLowerCase();
|
||||
|
||||
// maybe skip this "word"
|
||||
// stopwords array is from language_data.js
|
||||
if (
|
||||
stopwords.indexOf(queryTermLower) !== -1 ||
|
||||
queryTerm.match(/^\d+$/)
|
||||
)
|
||||
return;
|
||||
|
||||
// stem the word
|
||||
let word = stemmer.stemWord(queryTermLower);
|
||||
// select the correct list
|
||||
if (word[0] === "-") excludedTerms.add(word.substr(1));
|
||||
else {
|
||||
searchTerms.add(word);
|
||||
highlightTerms.add(queryTermLower);
|
||||
}
|
||||
});
|
||||
|
||||
if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js
|
||||
localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" "))
|
||||
}
|
||||
|
||||
// console.debug("SEARCH: searching for:");
|
||||
// console.info("required: ", [...searchTerms]);
|
||||
// console.info("excluded: ", [...excludedTerms]);
|
||||
|
||||
// array of [docname, title, anchor, descr, score, filename]
|
||||
let results = [];
|
||||
_removeChildren(document.getElementById("search-progress"));
|
||||
|
||||
const queryLower = query.toLowerCase().trim();
|
||||
for (const [title, foundTitles] of Object.entries(allTitles)) {
|
||||
if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) {
|
||||
for (const [file, id] of foundTitles) {
|
||||
let score = Math.round(100 * queryLower.length / title.length)
|
||||
results.push([
|
||||
docNames[file],
|
||||
titles[file] !== title ? `${titles[file]} > ${title}` : title,
|
||||
id !== null ? "#" + id : "",
|
||||
null,
|
||||
score,
|
||||
filenames[file],
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// search for explicit entries in index directives
|
||||
for (const [entry, foundEntries] of Object.entries(indexEntries)) {
|
||||
if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) {
|
||||
for (const [file, id] of foundEntries) {
|
||||
let score = Math.round(100 * queryLower.length / entry.length)
|
||||
results.push([
|
||||
docNames[file],
|
||||
titles[file],
|
||||
id ? "#" + id : "",
|
||||
null,
|
||||
score,
|
||||
filenames[file],
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// lookup as object
|
||||
objectTerms.forEach((term) =>
|
||||
results.push(...Search.performObjectSearch(term, objectTerms))
|
||||
);
|
||||
|
||||
// lookup as search terms in fulltext
|
||||
results.push(...Search.performTermsSearch(searchTerms, excludedTerms));
|
||||
|
||||
// let the scorer override scores with a custom scoring function
|
||||
if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item)));
|
||||
|
||||
// now sort the results by score (in opposite order of appearance, since the
|
||||
// display function below uses pop() to retrieve items) and then
|
||||
// alphabetically
|
||||
results.sort((a, b) => {
|
||||
const leftScore = a[4];
|
||||
const rightScore = b[4];
|
||||
if (leftScore === rightScore) {
|
||||
// same score: sort alphabetically
|
||||
const leftTitle = a[1].toLowerCase();
|
||||
const rightTitle = b[1].toLowerCase();
|
||||
if (leftTitle === rightTitle) return 0;
|
||||
return leftTitle > rightTitle ? -1 : 1; // inverted is intentional
|
||||
}
|
||||
return leftScore > rightScore ? 1 : -1;
|
||||
});
|
||||
|
||||
// remove duplicate search results
|
||||
// note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept
|
||||
let seen = new Set();
|
||||
results = results.reverse().reduce((acc, result) => {
|
||||
let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(',');
|
||||
if (!seen.has(resultStr)) {
|
||||
acc.push(result);
|
||||
seen.add(resultStr);
|
||||
}
|
||||
return acc;
|
||||
}, []);
|
||||
|
||||
results = results.reverse();
|
||||
|
||||
// for debugging
|
||||
//Search.lastresults = results.slice(); // a copy
|
||||
// console.info("search results:", Search.lastresults);
|
||||
|
||||
// print the results
|
||||
_displayNextItem(results, results.length, searchTerms, highlightTerms);
|
||||
},
|
||||
|
||||
/**
|
||||
* search for object names
|
||||
*/
|
||||
performObjectSearch: (object, objectTerms) => {
|
||||
const filenames = Search._index.filenames;
|
||||
const docNames = Search._index.docnames;
|
||||
const objects = Search._index.objects;
|
||||
const objNames = Search._index.objnames;
|
||||
const titles = Search._index.titles;
|
||||
|
||||
const results = [];
|
||||
|
||||
const objectSearchCallback = (prefix, match) => {
|
||||
const name = match[4]
|
||||
const fullname = (prefix ? prefix + "." : "") + name;
|
||||
const fullnameLower = fullname.toLowerCase();
|
||||
if (fullnameLower.indexOf(object) < 0) return;
|
||||
|
||||
let score = 0;
|
||||
const parts = fullnameLower.split(".");
|
||||
|
||||
// check for different match types: exact matches of full name or
|
||||
// "last name" (i.e. last dotted part)
|
||||
if (fullnameLower === object || parts.slice(-1)[0] === object)
|
||||
score += Scorer.objNameMatch;
|
||||
else if (parts.slice(-1)[0].indexOf(object) > -1)
|
||||
score += Scorer.objPartialMatch; // matches in last name
|
||||
|
||||
const objName = objNames[match[1]][2];
|
||||
const title = titles[match[0]];
|
||||
|
||||
// If more than one term searched for, we require other words to be
|
||||
// found in the name/title/description
|
||||
const otherTerms = new Set(objectTerms);
|
||||
otherTerms.delete(object);
|
||||
if (otherTerms.size > 0) {
|
||||
const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase();
|
||||
if (
|
||||
[...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0)
|
||||
)
|
||||
return;
|
||||
}
|
||||
|
||||
let anchor = match[3];
|
||||
if (anchor === "") anchor = fullname;
|
||||
else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname;
|
||||
|
||||
const descr = objName + _(", in ") + title;
|
||||
|
||||
// add custom score for some objects according to scorer
|
||||
if (Scorer.objPrio.hasOwnProperty(match[2]))
|
||||
score += Scorer.objPrio[match[2]];
|
||||
else score += Scorer.objPrioDefault;
|
||||
|
||||
results.push([
|
||||
docNames[match[0]],
|
||||
fullname,
|
||||
"#" + anchor,
|
||||
descr,
|
||||
score,
|
||||
filenames[match[0]],
|
||||
]);
|
||||
};
|
||||
Object.keys(objects).forEach((prefix) =>
|
||||
objects[prefix].forEach((array) =>
|
||||
objectSearchCallback(prefix, array)
|
||||
)
|
||||
);
|
||||
return results;
|
||||
},
|
||||
|
||||
/**
|
||||
* search for full-text terms in the index
|
||||
*/
|
||||
performTermsSearch: (searchTerms, excludedTerms) => {
|
||||
// prepare search
|
||||
const terms = Search._index.terms;
|
||||
const titleTerms = Search._index.titleterms;
|
||||
const filenames = Search._index.filenames;
|
||||
const docNames = Search._index.docnames;
|
||||
const titles = Search._index.titles;
|
||||
|
||||
const scoreMap = new Map();
|
||||
const fileMap = new Map();
|
||||
|
||||
// perform the search on the required terms
|
||||
searchTerms.forEach((word) => {
|
||||
const files = [];
|
||||
const arr = [
|
||||
{ files: terms[word], score: Scorer.term },
|
||||
{ files: titleTerms[word], score: Scorer.title },
|
||||
];
|
||||
// add support for partial matches
|
||||
if (word.length > 2) {
|
||||
const escapedWord = _escapeRegExp(word);
|
||||
Object.keys(terms).forEach((term) => {
|
||||
if (term.match(escapedWord) && !terms[word])
|
||||
arr.push({ files: terms[term], score: Scorer.partialTerm });
|
||||
});
|
||||
Object.keys(titleTerms).forEach((term) => {
|
||||
if (term.match(escapedWord) && !titleTerms[word])
|
||||
arr.push({ files: titleTerms[word], score: Scorer.partialTitle });
|
||||
});
|
||||
}
|
||||
|
||||
// no match but word was a required one
|
||||
if (arr.every((record) => record.files === undefined)) return;
|
||||
|
||||
// found search word in contents
|
||||
arr.forEach((record) => {
|
||||
if (record.files === undefined) return;
|
||||
|
||||
let recordFiles = record.files;
|
||||
if (recordFiles.length === undefined) recordFiles = [recordFiles];
|
||||
files.push(...recordFiles);
|
||||
|
||||
// set score for the word in each file
|
||||
recordFiles.forEach((file) => {
|
||||
if (!scoreMap.has(file)) scoreMap.set(file, {});
|
||||
scoreMap.get(file)[word] = record.score;
|
||||
});
|
||||
});
|
||||
|
||||
// create the mapping
|
||||
files.forEach((file) => {
|
||||
if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1)
|
||||
fileMap.get(file).push(word);
|
||||
else fileMap.set(file, [word]);
|
||||
});
|
||||
});
|
||||
|
||||
// now check if the files don't contain excluded terms
|
||||
const results = [];
|
||||
for (const [file, wordList] of fileMap) {
|
||||
// check if all requirements are matched
|
||||
|
||||
// as search terms with length < 3 are discarded
|
||||
const filteredTermCount = [...searchTerms].filter(
|
||||
(term) => term.length > 2
|
||||
).length;
|
||||
if (
|
||||
wordList.length !== searchTerms.size &&
|
||||
wordList.length !== filteredTermCount
|
||||
)
|
||||
continue;
|
||||
|
||||
// ensure that none of the excluded terms is in the search result
|
||||
if (
|
||||
[...excludedTerms].some(
|
||||
(term) =>
|
||||
terms[term] === file ||
|
||||
titleTerms[term] === file ||
|
||||
(terms[term] || []).includes(file) ||
|
||||
(titleTerms[term] || []).includes(file)
|
||||
)
|
||||
)
|
||||
break;
|
||||
|
||||
// select one (max) score for the file.
|
||||
const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w]));
|
||||
// add result to the result list
|
||||
results.push([
|
||||
docNames[file],
|
||||
titles[file],
|
||||
"",
|
||||
null,
|
||||
score,
|
||||
filenames[file],
|
||||
]);
|
||||
}
|
||||
return results;
|
||||
},
|
||||
|
||||
/**
|
||||
* helper function to return a node containing the
|
||||
* search summary for a given text. keywords is a list
|
||||
* of stemmed words.
|
||||
*/
|
||||
makeSearchSummary: (htmlText, keywords) => {
|
||||
const text = Search.htmlToText(htmlText);
|
||||
if (text === "") return null;
|
||||
|
||||
const textLower = text.toLowerCase();
|
||||
const actualStartPosition = [...keywords]
|
||||
.map((k) => textLower.indexOf(k.toLowerCase()))
|
||||
.filter((i) => i > -1)
|
||||
.slice(-1)[0];
|
||||
const startWithContext = Math.max(actualStartPosition - 120, 0);
|
||||
|
||||
const top = startWithContext === 0 ? "" : "...";
|
||||
const tail = startWithContext + 240 < text.length ? "..." : "";
|
||||
|
||||
let summary = document.createElement("p");
|
||||
summary.classList.add("context");
|
||||
summary.textContent = top + text.substr(startWithContext, 240).trim() + tail;
|
||||
|
||||
return summary;
|
||||
},
|
||||
};
|
||||
|
||||
_ready(Search.init);
|
||||
|
|
@ -1,144 +0,0 @@
|
|||
/* Highlighting utilities for Sphinx HTML documentation. */
|
||||
"use strict";
|
||||
|
||||
const SPHINX_HIGHLIGHT_ENABLED = true
|
||||
|
||||
/**
|
||||
* highlight a given string on a node by wrapping it in
|
||||
* span elements with the given class name.
|
||||
*/
|
||||
const _highlight = (node, addItems, text, className) => {
|
||||
if (node.nodeType === Node.TEXT_NODE) {
|
||||
const val = node.nodeValue;
|
||||
const parent = node.parentNode;
|
||||
const pos = val.toLowerCase().indexOf(text);
|
||||
if (
|
||||
pos >= 0 &&
|
||||
!parent.classList.contains(className) &&
|
||||
!parent.classList.contains("nohighlight")
|
||||
) {
|
||||
let span;
|
||||
|
||||
const closestNode = parent.closest("body, svg, foreignObject");
|
||||
const isInSVG = closestNode && closestNode.matches("svg");
|
||||
if (isInSVG) {
|
||||
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
|
||||
} else {
|
||||
span = document.createElement("span");
|
||||
span.classList.add(className);
|
||||
}
|
||||
|
||||
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
|
||||
parent.insertBefore(
|
||||
span,
|
||||
parent.insertBefore(
|
||||
document.createTextNode(val.substr(pos + text.length)),
|
||||
node.nextSibling
|
||||
)
|
||||
);
|
||||
node.nodeValue = val.substr(0, pos);
|
||||
|
||||
if (isInSVG) {
|
||||
const rect = document.createElementNS(
|
||||
"http://www.w3.org/2000/svg",
|
||||
"rect"
|
||||
);
|
||||
const bbox = parent.getBBox();
|
||||
rect.x.baseVal.value = bbox.x;
|
||||
rect.y.baseVal.value = bbox.y;
|
||||
rect.width.baseVal.value = bbox.width;
|
||||
rect.height.baseVal.value = bbox.height;
|
||||
rect.setAttribute("class", className);
|
||||
addItems.push({ parent: parent, target: rect });
|
||||
}
|
||||
}
|
||||
} else if (node.matches && !node.matches("button, select, textarea")) {
|
||||
node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
|
||||
}
|
||||
};
|
||||
const _highlightText = (thisNode, text, className) => {
|
||||
let addItems = [];
|
||||
_highlight(thisNode, addItems, text, className);
|
||||
addItems.forEach((obj) =>
|
||||
obj.parent.insertAdjacentElement("beforebegin", obj.target)
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Small JavaScript module for the documentation.
|
||||
*/
|
||||
const SphinxHighlight = {
|
||||
|
||||
/**
|
||||
* highlight the search words provided in localstorage in the text
|
||||
*/
|
||||
highlightSearchWords: () => {
|
||||
if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight
|
||||
|
||||
// get and clear terms from localstorage
|
||||
const url = new URL(window.location);
|
||||
const highlight =
|
||||
localStorage.getItem("sphinx_highlight_terms")
|
||||
|| url.searchParams.get("highlight")
|
||||
|| "";
|
||||
localStorage.removeItem("sphinx_highlight_terms")
|
||||
url.searchParams.delete("highlight");
|
||||
window.history.replaceState({}, "", url);
|
||||
|
||||
// get individual terms from highlight string
|
||||
const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
|
||||
if (terms.length === 0) return; // nothing to do
|
||||
|
||||
// There should never be more than one element matching "div.body"
|
||||
const divBody = document.querySelectorAll("div.body");
|
||||
const body = divBody.length ? divBody[0] : document.querySelector("body");
|
||||
window.setTimeout(() => {
|
||||
terms.forEach((term) => _highlightText(body, term, "highlighted"));
|
||||
}, 10);
|
||||
|
||||
const searchBox = document.getElementById("searchbox");
|
||||
if (searchBox === null) return;
|
||||
searchBox.appendChild(
|
||||
document
|
||||
.createRange()
|
||||
.createContextualFragment(
|
||||
'<p class="highlight-link">' +
|
||||
'<a href="javascript:SphinxHighlight.hideSearchWords()">' +
|
||||
_("Hide Search Matches") +
|
||||
"</a></p>"
|
||||
)
|
||||
);
|
||||
},
|
||||
|
||||
/**
|
||||
* helper function to hide the search marks again
|
||||
*/
|
||||
hideSearchWords: () => {
|
||||
document
|
||||
.querySelectorAll("#searchbox .highlight-link")
|
||||
.forEach((el) => el.remove());
|
||||
document
|
||||
.querySelectorAll("span.highlighted")
|
||||
.forEach((el) => el.classList.remove("highlighted"));
|
||||
localStorage.removeItem("sphinx_highlight_terms")
|
||||
},
|
||||
|
||||
initEscapeListener: () => {
|
||||
// only install a listener if it is really needed
|
||||
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
|
||||
|
||||
document.addEventListener("keydown", (event) => {
|
||||
// bail for input elements
|
||||
if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
|
||||
// bail with special keys
|
||||
if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
|
||||
if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
|
||||
SphinxHighlight.hideSearchWords();
|
||||
event.preventDefault();
|
||||
}
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
_ready(SphinxHighlight.highlightSearchWords);
|
||||
_ready(SphinxHighlight.initEscapeListener);
|
||||