cleaning examples and adding basic example
This commit is contained in:
parent
acfb02c51f
commit
9ad36ef008
|
@ -1,5 +1,11 @@
|
|||
Change Log 0.1.9
|
||||
----------------
|
||||
- [TODO] add LeQua2024
|
||||
- [TODO] add njobs to env
|
||||
- [TODO] add basic examples
|
||||
- [TODO] add default classifier to env
|
||||
- [TODO] add default classifier to env
|
||||
|
||||
- Moved the wiki documents to the ./docs/ folder so that they become editable via PR for the community
|
||||
|
||||
- Added Composable methods from Mirko Bunse's qunfold library! (thanks to Mirko Bunse!)
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
"""
|
||||
This is a basic example showcasing some of the important concepts behind quapy.
|
||||
First of all, import quapy. Wou would typically import quapy in the following way
|
||||
"""
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
|
||||
# let's fetch some dataset to run one experiment
|
||||
# datasets are available in the "qp.data.datasets" module (there is a shortcut in qp.datasets)
|
||||
|
||||
data = qp.datasets.fetch_reviews('hp')
|
||||
|
||||
# The data are in plain text format. You can convert them into tfidf using some utilities available in the
|
||||
# qp.data.preprocessing module, e.g.:
|
||||
|
||||
data = qp.data.preprocessing.text2tfidf(data, min_df=5)
|
||||
|
||||
# you can obtain the same result by specifying tfidf=True it in the fetch function:
|
||||
# data = qp.datasets.fetch_reviews('hp', tfidf=True, min_df=5)
|
||||
|
||||
# data is an object of type Dataset, a very basic collection that contains a "training" and a "test" collection inside.
|
||||
train, test = data.train_test
|
||||
|
||||
# train and test are instances of LabelledCollection, a class that contains covariates (X) and true labels (y), along
|
||||
# with sampling functionality. Here are some examples of usage:
|
||||
X, y = train.Xy
|
||||
print(f'number of classes {train.n_classes}')
|
||||
print(f'class names {train.classes_}')
|
||||
|
||||
import quapy.functional as F # <- this module has some functional utilities, like a string formatter for prevalences
|
||||
print(f'training prevalence = {F.strprev(train.prevalence())}')
|
||||
|
||||
# let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
|
||||
classifier = LogisticRegression()
|
||||
|
||||
pacc = qp.method.aggregative.PACC(classifier)
|
||||
|
||||
print(f'training {pacc}')
|
||||
pacc.fit(train)
|
||||
|
||||
# let's now test our quantifier on the test data (of course, we should not use the test labels y at this point, only X)
|
||||
X_test = test.X
|
||||
estim_prevalence = pacc.quantify(X_test)
|
||||
|
||||
print(f'estimated test prevalence = {F.strprev(estim_prevalence)}')
|
||||
print(f'true test prevalence = {F.strprev(test.prevalence())}')
|
||||
|
||||
# let us use some evaluation metric to check how well our quantifier fared.
|
||||
# Error metrics are available in the qp.error module.
|
||||
|
||||
mae_error = qp.error.mae(test.prevalence(), estim_prevalence)
|
||||
print(f'MAE={mae_error:.4f}')
|
||||
|
||||
# In quantification, we typically use an evaluation protocol to test the performance of a quantification method.
|
||||
# The reason is that, even though the test set contains many instances, the whole counts as 1 single datapoint to
|
||||
# the quantifier, because quantification targets samples of instances as a whole (while classification, or regression,
|
||||
# target instances individually).
|
||||
# Quapy provides some standard protocols in qp.protocol. We will use the artificial prevalence protocol (APP). APP
|
||||
# works by generating many test samples, out of our original test collection, characterized by different prevalence
|
||||
# values. To do so, a grid of prevalence values is explored, and different samples are generated conditioned on each
|
||||
# prevalence vector. This way, the quantifier is stress-tested on a wide range of prevalence values, i.e., under
|
||||
# prior probability shift conditions.
|
||||
|
||||
# In this case we use "test" and not only "test.X" since the protocol needs to know the class labels in order
|
||||
# to generate samples at different prevalences. We will generate samples of 100 instances, from a grid of 21 values,
|
||||
# i.e., from a grid = [0.0, 0.05, 0.10, ..., 1.00], and only one sample (repeats) for each combination.
|
||||
app = qp.protocol.APP(test, sample_size=100, n_prevalences=21, repeats=1)
|
||||
|
||||
# let's print some examples:
|
||||
show=5
|
||||
for i, (sample, prev) in enumerate(app()):
|
||||
print(f'sample-{i}: {F.strprev(prev)}')
|
||||
if i+1==5:
|
||||
break
|
||||
|
||||
# we can use the evaluation routine provided in quapy to test our method using a given protocol in terms of
|
||||
# one specific error metric
|
||||
absolute_errors = qp.evaluation.evaluate(model=pacc, protocol=app, error_metric='ae')
|
||||
print(f'MAE = {np.mean(absolute_errors):.4f}+-{np.std(absolute_errors):.4f}')
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -25,7 +25,9 @@ df = pd.DataFrame(columns=['method', 'dataset', 'MAE', 'MRAE', 'tr-time', 'te-ti
|
|||
|
||||
|
||||
for dataset_name in tqdm(qp.datasets.UCI_BINARY_DATASETS, total=len(qp.datasets.UCI_BINARY_DATASETS)):
|
||||
if dataset_name in ['acute.a', 'acute.b', 'balance.2', 'iris.1']: continue
|
||||
if dataset_name in ['acute.a', 'acute.b', 'balance.2', 'iris.1']:
|
||||
# these datasets tend to produce either too good or too bad results...
|
||||
continue
|
||||
|
||||
collection = qp.datasets.fetch_UCIBinaryLabelledCollection(dataset_name, verbose=False)
|
||||
train, test = collection.split_stratified()
|
|
@ -13,7 +13,7 @@ $ pip install quapy[bayesian]
|
|||
Running the script via:
|
||||
|
||||
```
|
||||
$ python examples/bayesian_quantification.py
|
||||
$ python examples/13.bayesian_quantification.py
|
||||
```
|
||||
|
||||
will produce a plot `bayesian_quantification.pdf`.
|
|
@ -33,7 +33,7 @@ returns an instance of SVM(Q) (i.e., an instance of CC properly set to work with
|
|||
Since we wan to explore the losses, we will instead use newELM. For this example we will create a quantifier for tweet
|
||||
sentiment analysis considering three classes: negative, neutral, and positive. Since SVMperf is a binary classifier,
|
||||
our quantifier will be binary as well. We will use a one-vs-all approach to work in multiclass model.
|
||||
For more details about how one-vs-all works, we refer to the example "one_vs_all.py" and to the API documentation.
|
||||
For more details about how one-vs-all works, we refer to the example "10.one_vs_all.py" and to the API documentation.
|
||||
"""
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
|
@ -1,63 +0,0 @@
|
|||
import numpy as np
|
||||
from abstention.calibration import NoBiasVectorScaling, VectorScaling, TempScaling
|
||||
from sklearn.calibration import CalibratedClassifierCV
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import quapy as qp
|
||||
import quapy.functional as F
|
||||
from classification.calibration import RecalibratedProbabilisticClassifierBase, NBVSCalibration, \
|
||||
BCTSCalibration
|
||||
from data.datasets import LEQUA2022_SAMPLE_SIZE, fetch_lequa2022
|
||||
from evaluation import evaluation_report
|
||||
from method.aggregative import EMQ
|
||||
from model_selection import GridSearchQ
|
||||
import pandas as pd
|
||||
|
||||
for task in ['T1A', 'T1B']:
|
||||
|
||||
# calibration = TempScaling(verbose=False, bias_positions='all')
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = LEQUA2022_SAMPLE_SIZE[task]
|
||||
training, val_generator, test_generator = fetch_lequa2022(task=task)
|
||||
|
||||
# define the quantifier
|
||||
# learner = BCTSCalibration(LogisticRegression(), n_jobs=-1)
|
||||
# learner = CalibratedClassifierCV(LogisticRegression())
|
||||
learner = LogisticRegression()
|
||||
quantifier = EMQ(classifier=learner)
|
||||
|
||||
# model selection
|
||||
param_grid = {
|
||||
'classifier__C': np.logspace(-3, 3, 7),
|
||||
'classifier__class_weight': ['balanced', None],
|
||||
'recalib': ['platt', 'ts', 'vs', 'nbvs', 'bcts', None],
|
||||
'exact_train_prev': [False, True]
|
||||
}
|
||||
model_selection = GridSearchQ(quantifier, param_grid, protocol=val_generator, error='mrae', n_jobs=-1, refit=False, verbose=True)
|
||||
quantifier = model_selection.fit(training)
|
||||
|
||||
# evaluation
|
||||
report = evaluation_report(quantifier, protocol=test_generator, error_metrics=['mae', 'mrae', 'mkld'], verbose=True)
|
||||
|
||||
# import os
|
||||
# os.makedirs(f'./out', exist_ok=True)
|
||||
# with open(f'./out/EMQ_{calib}_{task}.txt', 'wt') as foo:
|
||||
# estim_prev = report['estim-prev'].values
|
||||
# nclasses = len(estim_prev[0])
|
||||
# foo.write(f'id,'+','.join([str(x) for x in range(nclasses)])+'\n')
|
||||
# for id, prev in enumerate(estim_prev):
|
||||
# foo.write(f'{id},'+','.join([f'{p:.5f}' for p in prev])+'\n')
|
||||
#
|
||||
# #os.makedirs(f'./errors/{task}', exist_ok=True)
|
||||
# with open(f'./out/EMQ_{calib}_{task}_errors.txt', 'wt') as foo:
|
||||
# maes, mraes = report['mae'].values, report['mrae'].values
|
||||
# foo.write(f'id,AE,RAE\n')
|
||||
# for id, (ae_i, rae_i) in enumerate(zip(maes, mraes)):
|
||||
# foo.write(f'{id},{ae_i:.5f},{rae_i:.5f}\n')
|
||||
|
||||
# printing results
|
||||
pd.set_option('display.expand_frame_repr', False)
|
||||
report['estim-prev'] = report['estim-prev'].map(F.strprev)
|
||||
print(report)
|
||||
|
||||
print('Averaged values:')
|
||||
print(report.mean())
|
|
@ -791,7 +791,7 @@ def fetch_lequa2022(task, data_home=None):
|
|||
|
||||
The datasets are downloaded only once, and stored for fast reuse.
|
||||
|
||||
See `lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these
|
||||
See `4.lequa2022_experiments.py` provided in the example folder, that can serve as a guide on how to use these
|
||||
datasets.
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue