some refactor made in order to accomodate OneVsAll to operate with aggregative probabilistic quantifiers; launching OneVsAll(HDy)
This commit is contained in:
parent
e2eb3b6f06
commit
b30c40b7a0
|
@ -1,6 +1,6 @@
|
|||
from sklearn.linear_model import LogisticRegression
|
||||
import quapy as qp
|
||||
from quapy.method.aggregative import OneVsAll
|
||||
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
|
||||
import quapy.functional as F
|
||||
import numpy as np
|
||||
import os
|
||||
|
@ -22,19 +22,26 @@ def quantification_models():
|
|||
__C_range = np.logspace(-4, 5, 10)
|
||||
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
|
||||
svmperf_params = {'C': __C_range}
|
||||
yield 'cc', qp.method.aggregative.CC(newLR()), lr_params
|
||||
yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
|
||||
yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
|
||||
yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
|
||||
yield 'sld', qp.method.aggregative.EMQ(newLR()), lr_params
|
||||
yield 'svmq', OneVsAll(qp.method.aggregative.SVMQ(args.svmperfpath)), svmperf_params
|
||||
yield 'svmkld', OneVsAll(qp.method.aggregative.SVMKLD(args.svmperfpath)), svmperf_params
|
||||
yield 'svmnkld', OneVsAll(qp.method.aggregative.SVMNKLD(args.svmperfpath)), svmperf_params
|
||||
yield 'svmmae', OneVsAll(qp.method.aggregative.SVMAE(args.svmperfpath)), svmperf_params
|
||||
yield 'svmmrae', OneVsAll(qp.method.aggregative.SVMRAE(args.svmperfpath)), svmperf_params
|
||||
|
||||
#sld = qp.method.aggregative.EMQ(newLR())
|
||||
#yield 'paccsld', qp.method.aggregative.PACC(sld), lr_params
|
||||
# methods tested in Gao & Sebastiani 2016
|
||||
yield 'cc', CC(newLR()), lr_params
|
||||
yield 'acc', ACC(newLR()), lr_params
|
||||
yield 'pcc', PCC(newLR()), lr_params
|
||||
yield 'pacc', PACC(newLR()), lr_params
|
||||
yield 'sld', EMQ(newLR()), lr_params
|
||||
yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
|
||||
yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
|
||||
yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
|
||||
|
||||
# methods added
|
||||
yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
|
||||
yield 'svmmrae', OneVsAll(SVMRAE(args.svmperfpath)), svmperf_params
|
||||
yield 'hdy', OneVsAll(HDy(newLR())), lr_params
|
||||
|
||||
# to add:
|
||||
# quapy
|
||||
# ensembles
|
||||
#
|
||||
|
||||
# 'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(),
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ from . import evaluation
|
|||
from . import plot
|
||||
from . import util
|
||||
from . import model_selection
|
||||
from quapy.method.aggregative import isaggregative, isprobabilistic
|
||||
from quapy.method.base import isprobabilistic, isaggregative
|
||||
|
||||
|
||||
environ = {
|
||||
|
@ -21,3 +21,5 @@ environ = {
|
|||
|
||||
def isbinary(x):
|
||||
return x.binary
|
||||
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import quapy as qp
|
|||
from quapy.data import LabelledCollection
|
||||
from quapy.method.base import BaseQuantifier
|
||||
from quapy.util import temp_seed
|
||||
import quapy.functional as F
|
||||
|
||||
|
||||
def artificial_sampling_prediction(
|
||||
|
@ -39,18 +40,18 @@ def artificial_sampling_prediction(
|
|||
with temp_seed(random_seed):
|
||||
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
|
||||
|
||||
if isinstance(model, qp.method.aggregative.AggregativeQuantifier):
|
||||
# print('\tinstance of aggregative-quantifier')
|
||||
if model.aggregative: #isinstance(model, qp.method.aggregative.AggregativeQuantifier):
|
||||
print('\tinstance of aggregative-quantifier')
|
||||
quantification_func = model.aggregate
|
||||
if isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier):
|
||||
# print('\t\tinstance of probabilitstic-aggregative-quantifier')
|
||||
if model.probabilistic: # isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier):
|
||||
print('\t\tinstance of probabilitstic-aggregative-quantifier')
|
||||
preclassified_instances = model.posterior_probabilities(test.instances)
|
||||
else:
|
||||
# print('\t\tinstance of hard-aggregative-quantifier')
|
||||
print('\t\tinstance of hard-aggregative-quantifier')
|
||||
preclassified_instances = model.classify(test.instances)
|
||||
test = LabelledCollection(preclassified_instances, test.labels)
|
||||
else:
|
||||
# print('\t\tinstance of base-quantifier')
|
||||
print('\t\tinstance of base-quantifier')
|
||||
quantification_func = model.quantify
|
||||
|
||||
def _predict_prevalences(index):
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
from abc import abstractmethod
|
||||
from copy import deepcopy
|
||||
from typing import Union
|
||||
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed
|
||||
from sklearn.base import BaseEstimator
|
||||
|
@ -60,6 +59,10 @@ class AggregativeQuantifier(BaseQuantifier):
|
|||
def classes(self):
|
||||
return self.learner.classes_
|
||||
|
||||
@property
|
||||
def aggregative(self):
|
||||
return True
|
||||
|
||||
|
||||
class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
|
||||
"""
|
||||
|
@ -84,6 +87,9 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
|
|||
parameters={'base_estimator__'+k:v for k,v in parameters.items()}
|
||||
self.learner.set_params(**parameters)
|
||||
|
||||
@property
|
||||
def probabilistic(self):
|
||||
return True
|
||||
|
||||
|
||||
# Helper
|
||||
|
@ -385,6 +391,10 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
|
|||
Px = self.posterior_probabilities(validation.instances)[:,1] # takes only the P(y=+1|x)
|
||||
self.Pxy1 = Px[validation.labels == 1]
|
||||
self.Pxy0 = Px[validation.labels == 0]
|
||||
# pre-compute the histogram for positive and negative examples
|
||||
self.bins = np.linspace(10, 110, 11, dtype=int) #[10, 20, 30, ..., 100, 110]
|
||||
self.Pxy1_density = {bins: np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)[0] for bins in self.bins}
|
||||
self.Pxy0_density = {bins: np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)[0] for bins in self.bins}
|
||||
return self
|
||||
|
||||
def aggregate(self, classif_posteriors):
|
||||
|
@ -395,9 +405,12 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
|
|||
Px = classif_posteriors[:,1] # takes only the P(y=+1|x)
|
||||
|
||||
prev_estimations = []
|
||||
for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110]
|
||||
Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
|
||||
Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
|
||||
#for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110]
|
||||
#Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
|
||||
#Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
|
||||
for bins in self.bins:
|
||||
Pxy0_density = self.Pxy0_density[bins]
|
||||
Pxy1_density = self.Pxy1_density[bins]
|
||||
|
||||
Px_test, _ = np.histogram(Px, bins=bins, range=(0, 1), density=True)
|
||||
|
||||
|
@ -488,9 +501,7 @@ class OneVsAll(AggregativeQuantifier):
|
|||
assert isinstance(self.binary_quantifier, BaseQuantifier), \
|
||||
f'{self.binary_quantifier} does not seem to be a Quantifier'
|
||||
assert fit_learner==True, 'fit_learner must be True'
|
||||
if not isinstance(self.binary_quantifier, BinaryQuantifier):
|
||||
raise ValueError(f'{self.binary_quantifier.__class__.__name__} does not seem to be an instance of '
|
||||
f'{BinaryQuantifier.__class__.__name__}')
|
||||
|
||||
self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_}
|
||||
self.__parallel(self._delayed_binary_fit, data)
|
||||
return self
|
||||
|
@ -502,20 +513,39 @@ class OneVsAll(AggregativeQuantifier):
|
|||
classif_predictions_bin = self.__parallel(self._delayed_binary_classification, instances)
|
||||
return classif_predictions_bin.T
|
||||
|
||||
def posterior_probabilities(self, instances):
|
||||
# returns a matrix of shape (n,m,2) with n the number of instances and m the number of classes. The entry
|
||||
# (i,j,1) (resp. (i,j,0)) is a value in [0,1] indicating the posterior probability that instance i belongs
|
||||
# (resp. does not belong) to class j.
|
||||
# The posterior probabilities are independent of each other, meaning that, in general, they do not sum
|
||||
# up to one.
|
||||
if not self.binary_quantifier.probabilistic:
|
||||
raise NotImplementedError(f'{self.__class__.__name__} does not implement posterior_probabilities because '
|
||||
f'the base quantifier {self.binary_quantifier.__class__.__name__} is not '
|
||||
f'probabilistic')
|
||||
posterior_predictions_bin = self.__parallel(self._delayed_binary_posteriors, instances)
|
||||
return np.swapaxes(posterior_predictions_bin, 0, 1)
|
||||
|
||||
def aggregate(self, classif_predictions_bin):
|
||||
if self.probabilistic:
|
||||
assert classif_predictions_bin.shape[1]==self.n_classes and classif_predictions_bin.shape[2]==2, \
|
||||
'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of posterior ' \
|
||||
'probabilities (2 dimensions) for each document (row) and class (columns)'
|
||||
else:
|
||||
assert set(np.unique(classif_predictions_bin)).issubset({0,1}), \
|
||||
'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \
|
||||
'predictions for each document (row) and class (columns)'
|
||||
prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions_bin)
|
||||
#prevalences = []
|
||||
#for c in self.classes:
|
||||
# prevalences.append(self._delayed_binary_aggregate(c, classif_predictions_bin))
|
||||
#prevalences = np.asarray(prevalences)
|
||||
return F.normalize_prevalence(prevalences)
|
||||
|
||||
def quantify(self, X):
|
||||
prevalences = self.__parallel(self._delayed_binary_quantify, X)
|
||||
return F.normalize_prevalence(prevalences)
|
||||
if self.probabilistic:
|
||||
predictions = self.posterior_probabilities(X)
|
||||
else:
|
||||
predictions = self.classify(X)
|
||||
return self.aggregate(predictions)
|
||||
#prevalences = self.__parallel(self._delayed_binary_quantify, X)
|
||||
#return F.normalize_prevalence(prevalences)
|
||||
|
||||
def __parallel(self, func, *args, **kwargs):
|
||||
return np.asarray(
|
||||
|
@ -537,9 +567,12 @@ class OneVsAll(AggregativeQuantifier):
|
|||
def _delayed_binary_classification(self, c, X):
|
||||
return self.dict_binary_quantifiers[c].classify(X)
|
||||
|
||||
def _delayed_binary_quantify(self, c, X):
|
||||
def _delayed_binary_posteriors(self, c, X):
|
||||
return self.dict_binary_quantifiers[c].posterior_probabilities(X)
|
||||
|
||||
#def _delayed_binary_quantify(self, c, X):
|
||||
# the estimation for the positive class prevalence
|
||||
return self.dict_binary_quantifiers[c].quantify(X)[1]
|
||||
# return self.dict_binary_quantifiers[c].quantify(X)[1]
|
||||
|
||||
def _delayed_binary_aggregate(self, c, classif_predictions):
|
||||
# the estimation for the positive class prevalence
|
||||
|
@ -549,13 +582,14 @@ class OneVsAll(AggregativeQuantifier):
|
|||
bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2)
|
||||
self.dict_binary_quantifiers[c].fit(bindata)
|
||||
|
||||
@property
|
||||
def binary(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def probabilistic(self):
|
||||
return self.binary_quantifier.probabilistic
|
||||
|
||||
def isaggregative(model:BaseQuantifier):
|
||||
return isinstance(model, AggregativeQuantifier)
|
||||
|
||||
|
||||
def isprobabilistic(model:BaseQuantifier):
|
||||
return isinstance(model, AggregativeProbabilisticQuantifier)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -5,12 +5,10 @@ from quapy.data import LabelledCollection
|
|||
|
||||
# Base Quantifier abstract class
|
||||
# ------------------------------------
|
||||
|
||||
|
||||
class BaseQuantifier(metaclass=ABCMeta):
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, data): ...
|
||||
def fit(self, data: LabelledCollection): ...
|
||||
|
||||
@abstractmethod
|
||||
def quantify(self, instances): ...
|
||||
|
@ -21,10 +19,20 @@ class BaseQuantifier(metaclass=ABCMeta):
|
|||
@abstractmethod
|
||||
def get_params(self, deep=True): ...
|
||||
|
||||
# these methods allows meta-learners to reimplement the decision based on their constituents, and not
|
||||
# based on class structure
|
||||
@property
|
||||
def binary(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def aggregative(self):
|
||||
return False
|
||||
|
||||
@property
|
||||
def probabilistic(self):
|
||||
return False
|
||||
|
||||
|
||||
class BinaryQuantifier(BaseQuantifier):
|
||||
def _check_binary(self, data: LabelledCollection, quantifier_name):
|
||||
|
@ -40,7 +48,15 @@ def isbinary(model:BaseQuantifier):
|
|||
return model.binary
|
||||
|
||||
|
||||
# class OneVsAll(AggregativeQuantifier):
|
||||
def isaggregative(model:BaseQuantifier):
|
||||
return model.aggregative
|
||||
|
||||
|
||||
def isprobabilistic(model:BaseQuantifier):
|
||||
return model.probabilistic
|
||||
|
||||
|
||||
# class OneVsAll:
|
||||
# """
|
||||
# Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary
|
||||
# quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1.
|
||||
|
|
|
@ -152,6 +152,19 @@ class Ensemble(BaseQuantifier):
|
|||
order = np.argsort(dist)
|
||||
return select_k(predictions, order, k=self.red_size)
|
||||
|
||||
@property
|
||||
def binary(self):
|
||||
return self.base_quantifier.binary
|
||||
|
||||
@property
|
||||
def aggregative(self):
|
||||
raise NotImplementedError('aggregative functionality not yet supported for Ensemble')
|
||||
|
||||
@property
|
||||
def probabilistic(self):
|
||||
raise NotImplementedError('probabilistic functionality not yet supported for Ensemble')
|
||||
#return self.base_quantifier.probabilistic
|
||||
|
||||
|
||||
def get_probability_distribution(posterior_probabilities, bins=8):
|
||||
assert posterior_probabilities.shape[1]==2, 'the posterior probabilities do not seem to be for a binary problem'
|
||||
|
|
|
@ -157,7 +157,7 @@ class GridSearchQ(BaseQuantifier):
|
|||
model.fit(training)
|
||||
true_prevalences, estim_prevalences = artificial_sampling_prediction(
|
||||
model, validation, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
|
||||
verbose=False
|
||||
verbose=True
|
||||
)
|
||||
|
||||
score = self.error(true_prevalences, estim_prevalences)
|
||||
|
|
Loading…
Reference in New Issue