bugfix in protocols, return_type='index' not working
This commit is contained in:
parent
24c28edfd9
commit
e6ae1e7d77
3
TODO.txt
3
TODO.txt
|
|
@ -0,0 +1,3 @@
|
|||
- Test the return_type="index" in protocols and finish the "distributin_samples.py" example
|
||||
- Add EDy (an implementation is available at quantificationlib)
|
||||
-
|
||||
|
|
@ -33,10 +33,8 @@ import quapy.functional as F # <- this module has some functional utilities, li
|
|||
print(f'training prevalence = {F.strprev(train.prevalence())}')
|
||||
|
||||
# let us train one quantifier, for example, PACC using a sklearn's Logistic Regressor as the underlying classifier
|
||||
# classifier = LogisticRegression()
|
||||
|
||||
# pacc = qp.method.aggregative.PACC(classifier)
|
||||
pacc = qp.method.aggregative.PACC()
|
||||
classifier = LogisticRegression()
|
||||
pacc = qp.method.aggregative.PACC(classifier)
|
||||
|
||||
print(f'training {pacc}')
|
||||
pacc.fit(train)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
"""
|
||||
Imagine we want to generate many samples out of a collection, that we want to distribute for others to run their
|
||||
own experiments in the very same test samples. One naive solution would come down to applying a given protocol to
|
||||
our collection (say the artificial prevalence protocol on the 'academic-success' UCI dataset), store all those samples
|
||||
on disk and make them available online. Distributing many such samples is undesirable.
|
||||
In this example, we generate the indexes that allow anyone to regenerate the samples out of the original collection.
|
||||
"""
|
||||
|
||||
import quapy as qp
|
||||
from quapy.method.aggregative import PACC
|
||||
from quapy.protocol import UPP
|
||||
|
||||
data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
|
||||
train, test = data.train_test
|
||||
|
||||
# let us train a quantifier to check whether we can actually replicate the results
|
||||
quantifier = PACC()
|
||||
quantifier.fit(train)
|
||||
|
||||
# let us simulate our experimental results
|
||||
protocol = UPP(test, sample_size=100, repeats=100, random_state=0)
|
||||
our_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
|
||||
|
||||
print(f'We have obtained a MAE={our_mae:.3f}')
|
||||
|
||||
# let us distribute the indexes; we specify that we want the indexes, not the samples
|
||||
protocol = UPP(test, sample_size=100, repeats=100, random_state=0, return_type='index')
|
||||
indexes = protocol.samples_parameters()
|
||||
|
||||
# Imagine we distribute the indexes; now we show how to replicate our experiments.
|
||||
from quapy.protocol import ProtocolFromIndex
|
||||
data = qp.datasets.fetch_UCIMulticlassDataset('academic-success')
|
||||
train, test = data.train_test
|
||||
protocol = ProtocolFromIndex(data=test, indexes=indexes)
|
||||
their_mae = qp.evaluation.evaluate(quantifier, protocol=protocol, error_metric='mae')
|
||||
|
||||
print(f'Another lab obtains a MAE={our_mae:.3f}')
|
||||
|
||||
|
|
@ -298,6 +298,31 @@ def nmd(prevs, prevs_hat):
|
|||
return (1./(n-1))*np.mean(match_distance(prevs, prevs_hat))
|
||||
|
||||
|
||||
def bias_binary(prevs, prevs_hat):
|
||||
"""
|
||||
Computes the (positive) bias in a binary problem. The bias is simply the difference between the
|
||||
predicted positive value and the true positive value, so that a positive such value indicates the
|
||||
prediction has positive bias (i.e., it tends to overestimate) the true value, and negative otherwise.
|
||||
:math:`bias(p,\\hat{p})=\\hat{p}_1-p_1`,
|
||||
:param prevs: array-like of shape `(n_samples, n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_samples, n_classes,)` with the predicted
|
||||
prevalence values
|
||||
:return: binary bias
|
||||
"""
|
||||
assert prevs.shape[-1] == 2 and prevs.shape[-1] == 2, f'bias_binary can only be applied to binary problems'
|
||||
return prevs_hat[...,1]-prevs[...,1]
|
||||
|
||||
|
||||
def mean_bias_binary(prevs, prevs_hat):
|
||||
"""
|
||||
Computes the mean of the (positive) bias in a binary problem.
|
||||
:param prevs: array-like of shape `(n_classes,)` with the true prevalence values
|
||||
:param prevs_hat: array-like of shape `(n_classes,)` with the predicted prevalence values
|
||||
:return: mean binary bias
|
||||
"""
|
||||
return np.mean(bias_binary(prevs, prevs_hat))
|
||||
|
||||
|
||||
def md(prevs, prevs_hat, ERROR_TOL=1E-3):
|
||||
"""
|
||||
Computes the Match Distance, under the assumption that the cost in mistaking class i with class i+1 is 1 in
|
||||
|
|
@ -338,8 +363,8 @@ def __check_eps(eps=None):
|
|||
|
||||
|
||||
CLASSIFICATION_ERROR = {f1e, acce}
|
||||
QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld}
|
||||
QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld}
|
||||
QUANTIFICATION_ERROR = {mae, mnae, mrae, mnrae, mse, mkld, mnkld, mean_bias_binary}
|
||||
QUANTIFICATION_ERROR_SINGLE = {ae, nae, rae, nrae, se, kld, nkld, bias_binary}
|
||||
QUANTIFICATION_ERROR_SMOOTH = {kld, nkld, rae, nrae, mkld, mnkld, mrae}
|
||||
CLASSIFICATION_ERROR_NAMES = {func.__name__ for func in CLASSIFICATION_ERROR}
|
||||
QUANTIFICATION_ERROR_NAMES = {func.__name__ for func in QUANTIFICATION_ERROR}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
from copy import deepcopy
|
||||
from typing import Iterable
|
||||
|
||||
import quapy as qp
|
||||
import numpy as np
|
||||
import itertools
|
||||
|
|
@ -62,6 +64,36 @@ class IterateProtocol(AbstractProtocol):
|
|||
return len(self.samples)
|
||||
|
||||
|
||||
class ProtocolFromIndex(AbstractProtocol):
|
||||
"""
|
||||
A protocol from a list of indexes
|
||||
|
||||
:param data: a :class:`quapy.data.base.LabelledCollection`
|
||||
:param indexes: a list of indexes
|
||||
"""
|
||||
def __init__(self, data: LabelledCollection, indexes: Iterable):
|
||||
self.data = data
|
||||
self.indexes = indexes
|
||||
|
||||
def __call__(self):
|
||||
"""
|
||||
Yields one sample at a time extracted using the indexes
|
||||
|
||||
:return: yields a tuple `(sample, prev) at a time, where `sample` is a set of instances
|
||||
and in which `prev` is an `nd.array` with the class prevalence values
|
||||
"""
|
||||
for index in self.indexes:
|
||||
yield self.data.sampling_from_index(index).Xp
|
||||
|
||||
def total(self):
|
||||
"""
|
||||
Returns the number of samples in this protocol
|
||||
|
||||
:return: int
|
||||
"""
|
||||
return len(self.indexes)
|
||||
|
||||
|
||||
class AbstractStochasticSeededProtocol(AbstractProtocol):
|
||||
"""
|
||||
An `AbstractStochasticSeededProtocol` is a protocol that generates, via any random procedure (e.g.,
|
||||
|
|
@ -124,9 +156,9 @@ class AbstractStochasticSeededProtocol(AbstractProtocol):
|
|||
if self.random_state is not None:
|
||||
stack.enter_context(qp.util.temp_seed(self.random_state))
|
||||
for params in self.samples_parameters():
|
||||
yield self.collator(self.sample(params))
|
||||
yield self.collator(self.sample(params), params)
|
||||
|
||||
def collator(self, sample, *args):
|
||||
def collator(self, sample, params):
|
||||
"""
|
||||
The collator prepares the sample to accommodate the desired output format before returning the output.
|
||||
This collator simply returns the sample as it is. Classes inheriting from this abstract class can
|
||||
|
|
@ -191,9 +223,11 @@ class OnLabelledCollectionProtocol:
|
|||
assert return_type in cls.RETURN_TYPES, \
|
||||
f'unknown return type passed as argument; valid ones are {cls.RETURN_TYPES}'
|
||||
if return_type=='sample_prev':
|
||||
return lambda lc:lc.Xp
|
||||
return lambda lc,params:lc.Xp
|
||||
elif return_type=='labelled_collection':
|
||||
return lambda lc:lc
|
||||
return lambda lc,params:lc
|
||||
elif return_type=='index':
|
||||
return lambda lc,params:params
|
||||
|
||||
|
||||
class APP(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol):
|
||||
|
|
|
|||
Loading…
Reference in New Issue