plot functionality added

This commit is contained in:
Alejandro Moreo Fernandez 2021-01-07 17:58:48 +01:00
parent 5894d46b31
commit d1b449d2e9
9 changed files with 318 additions and 50 deletions

48
plot_example.py Normal file
View File

@ -0,0 +1,48 @@
from sklearn.model_selection import GridSearchCV
import numpy as np
import quapy as qp
from sklearn.linear_model import LogisticRegression
sample_size = 500
qp.environ['SAMPLE_SIZE'] = sample_size
def gen_data():
data = qp.datasets.fetch_reviews('kindle', tfidf=True, min_df=5)
models = [
qp.method.aggregative.CC,
qp.method.aggregative.ACC,
qp.method.aggregative.PCC,
qp.method.aggregative.PACC,
qp.method.aggregative.HDy,
qp.method.aggregative.EMQ,
qp.method.meta.ECC,
qp.method.meta.EACC,
qp.method.meta.EHDy,
]
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
for Quantifier in models:
print(f'training {Quantifier.__name__}')
lr = LogisticRegression(max_iter=1000, class_weight='balanced')
# lr = GridSearchCV(lr, param_grid={'C':np.logspace(-3,3,7)}, n_jobs=-1)
model = Quantifier(lr).fit(data.training)
true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(
model, data.test, sample_size, n_repetitions=20, n_prevpoints=11)
method_names.append(Quantifier.__name__)
true_prevs.append(true_prev)
estim_prevs.append(estim_prev)
tr_prevs.append(data.training.prevalence())
return method_names, true_prevs, estim_prevs, tr_prevs
method_names, true_prevs, estim_prevs, tr_prevs = qp.util.pickled_resource('./plots/plot_data.pkl', gen_data)
qp.plot.error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=11, savepath='./plots/err_drift.png')
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, savepath='./plots/bin_diag.png')
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, savepath='./plots/bin_bias.png')
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, nbins=11, savepath='./plots/bin_bias_bin.png')

View File

@ -4,6 +4,8 @@ from . import functional
from . import method from . import method
from . import data from . import data
from . import evaluation from . import evaluation
from . import plot
from . import util
from method.aggregative import isaggregative, isprobabilistic from method.aggregative import isaggregative, isprobabilistic
@ -17,4 +19,4 @@ environ = {
def isbinary(x): def isbinary(x):
return data.isbinary(x) or method.aggregative.isbinary(x) return data.isbinary(x) or method.isbinary(x)

View File

@ -30,9 +30,9 @@ def artificial_sampling_prediction(
:param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect :param random_seed: allows to replicate the samplings. The seed is local to the method and does not affect
any other random process. any other random process.
:param verbose: if True, shows a progress bar :param verbose: if True, shows a progress bar
:return: two ndarrays of [m,n] with m the number of samples (n_prevpoints*n_repetitions) and n the :return: two ndarrays of shape (m,n) with m the number of samples (n_prevpoints*n_repetitions) and n the
number of classes. The first one contains the true prevalences for the samples generated while the second one number of classes. The first one contains the true prevalences for the samples generated while the second one
containing the the prevalences estimations contains the the prevalence estimations
""" """
with temp_seed(random_seed): with temp_seed(random_seed):

View File

@ -5,13 +5,13 @@ from . import meta
AGGREGATIVE_METHODS = { AGGREGATIVE_METHODS = {
aggregative.ClassifyAndCount, aggregative.CC,
aggregative.AdjustedClassifyAndCount, aggregative.ACC,
aggregative.ProbabilisticClassifyAndCount, aggregative.PCC,
aggregative.ProbabilisticAdjustedClassifyAndCount, aggregative.PACC,
aggregative.ExplicitLossMinimisation, aggregative.ELM,
aggregative.ExpectationMaximizationQuantifier, aggregative.EMQ,
aggregative.HellingerDistanceY aggregative.HDy
} }
NON_AGGREGATIVE_METHODS = { NON_AGGREGATIVE_METHODS = {

View File

@ -1,5 +1,6 @@
import numpy as np import numpy as np
from copy import deepcopy from copy import deepcopy
from sklearn.base import BaseEstimator, clone
import functional as F import functional as F
import error import error
from method.base import BaseQuantifier, BinaryQuantifier from method.base import BaseQuantifier, BinaryQuantifier
@ -130,13 +131,13 @@ def training_helper(learner,
# Methods # Methods
# ------------------------------------ # ------------------------------------
class ClassifyAndCount(AggregativeQuantifier): class CC(AggregativeQuantifier):
""" """
The most basic Quantification method. One that simply classifies all instances and countes how many have been The most basic Quantification method. One that simply classifies all instances and countes how many have been
attributed each of the classes in order to compute class prevalence estimates. attributed each of the classes in order to compute class prevalence estimates.
""" """
def __init__(self, learner): def __init__(self, learner:BaseEstimator):
self.learner = learner self.learner = learner
def fit(self, data: LabelledCollection, fit_learner=True): def fit(self, data: LabelledCollection, fit_learner=True):
@ -153,9 +154,9 @@ class ClassifyAndCount(AggregativeQuantifier):
return F.prevalence_from_labels(classif_predictions, self.n_classes) return F.prevalence_from_labels(classif_predictions, self.n_classes)
class AdjustedClassifyAndCount(AggregativeQuantifier): class ACC(AggregativeQuantifier):
def __init__(self, learner): def __init__(self, learner:BaseEstimator):
self.learner = learner self.learner = learner
def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3): def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3):
@ -169,7 +170,7 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
:return: self :return: self
""" """
self.learner, validation = training_helper(self.learner, data, fit_learner, val_split=val_split) self.learner, validation = training_helper(self.learner, data, fit_learner, val_split=val_split)
self.cc = ClassifyAndCount(self.learner) self.cc = CC(self.learner)
y_ = self.classify(validation.instances) y_ = self.classify(validation.instances)
y = validation.labels y = validation.labels
# estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
@ -182,7 +183,7 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
def aggregate(self, classif_predictions): def aggregate(self, classif_predictions):
prevs_estim = self.cc.aggregate(classif_predictions) prevs_estim = self.cc.aggregate(classif_predictions)
return AdjustedClassifyAndCount.solve_adjustment(self.Pte_cond_estim_, prevs_estim) return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
@classmethod @classmethod
def solve_adjustment(cls, PteCondEstim, prevs_estim): def solve_adjustment(cls, PteCondEstim, prevs_estim):
@ -198,8 +199,8 @@ class AdjustedClassifyAndCount(AggregativeQuantifier):
return adjusted_prevs return adjusted_prevs
class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier): class PCC(AggregativeProbabilisticQuantifier):
def __init__(self, learner): def __init__(self, learner:BaseEstimator):
self.learner = learner self.learner = learner
def fit(self, data : LabelledCollection, fit_learner=True): def fit(self, data : LabelledCollection, fit_learner=True):
@ -210,9 +211,9 @@ class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier):
return F.prevalence_from_probabilities(classif_posteriors, binarize=False) return F.prevalence_from_probabilities(classif_posteriors, binarize=False)
class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier): class PACC(AggregativeProbabilisticQuantifier):
def __init__(self, learner): def __init__(self, learner:BaseEstimator):
self.learner = learner self.learner = learner
def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3): def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3):
@ -228,7 +229,7 @@ class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier):
self.learner, validation = training_helper( self.learner, validation = training_helper(
self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split self.learner, data, fit_learner, ensure_probabilistic=True, val_split=val_split
) )
self.pcc = ProbabilisticClassifyAndCount(self.learner) self.pcc = PCC(self.learner)
y_ = self.soft_classify(validation.instances) y_ = self.soft_classify(validation.instances)
y = validation.labels y = validation.labels
confusion = np.empty(shape=(data.n_classes, data.n_classes)) confusion = np.empty(shape=(data.n_classes, data.n_classes))
@ -246,7 +247,7 @@ class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier):
def aggregate(self, classif_posteriors): def aggregate(self, classif_posteriors):
prevs_estim = self.pcc.aggregate(classif_posteriors) prevs_estim = self.pcc.aggregate(classif_posteriors)
return AdjustedClassifyAndCount.solve_adjustment(self.Pte_cond_estim_, prevs_estim) return ACC.solve_adjustment(self.Pte_cond_estim_, prevs_estim)
def classify(self, data): def classify(self, data):
return self.pcc.classify(data) return self.pcc.classify(data)
@ -255,12 +256,12 @@ class ProbabilisticAdjustedClassifyAndCount(AggregativeProbabilisticQuantifier):
return self.pcc.posterior_probabilities(data) return self.pcc.posterior_probabilities(data)
class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier): class EMQ(AggregativeProbabilisticQuantifier):
MAX_ITER = 1000 MAX_ITER = 1000
EPSILON = 1e-4 EPSILON = 1e-4
def __init__(self, learner): def __init__(self, learner:BaseEstimator):
self.learner = learner self.learner = learner
def fit(self, data: LabelledCollection, fit_learner=True): def fit(self, data: LabelledCollection, fit_learner=True):
@ -279,7 +280,7 @@ class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
s, converged = 0, False s, converged = 0, False
qs_prev_ = None qs_prev_ = None
while not converged and s < ExpectationMaximizationQuantifier.MAX_ITER: while not converged and s < EMQ.MAX_ITER:
# E-step: ps is Ps(y=+1|xi) # E-step: ps is Ps(y=+1|xi)
ps_unnormalized = (qs / Ptr) * Px ps_unnormalized = (qs / Ptr) * Px
ps = ps_unnormalized / ps_unnormalized.sum(axis=1).reshape(-1,1) ps = ps_unnormalized / ps_unnormalized.sum(axis=1).reshape(-1,1)
@ -299,14 +300,14 @@ class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
return qs return qs
class HellingerDistanceY(AggregativeProbabilisticQuantifier, BinaryQuantifier): class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
""" """
Implementation of the method based on the Hellinger Distance y (HDy) proposed by Implementation of the method based on the Hellinger Distance y (HDy) proposed by
González-Castro, V., Alaiz-Rodrı́guez, R., and Alegre, E. (2013). Class distribution González-Castro, V., Alaiz-Rodrı́guez, R., and Alegre, E. (2013). Class distribution
estimation based on the Hellinger distance. Information Sciences, 218:146164. estimation based on the Hellinger distance. Information Sciences, 218:146164.
""" """
def __init__(self, learner): def __init__(self, learner:BaseEstimator):
self.learner = learner self.learner = learner
def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3): def fit(self, data: LabelledCollection, fit_learner=True, val_split:Union[float, LabelledCollection]=0.3):
@ -353,7 +354,7 @@ class HellingerDistanceY(AggregativeProbabilisticQuantifier, BinaryQuantifier):
return np.asarray([1-pos_class_prev, pos_class_prev]) return np.asarray([1-pos_class_prev, pos_class_prev])
class ExplicitLossMinimisation(AggregativeQuantifier, BinaryQuantifier): class ELM(AggregativeQuantifier, BinaryQuantifier):
def __init__(self, svmperf_base, loss, **kwargs): def __init__(self, svmperf_base, loss, **kwargs):
self.svmperf_base = svmperf_base self.svmperf_base = svmperf_base
@ -374,38 +375,38 @@ class ExplicitLossMinimisation(AggregativeQuantifier, BinaryQuantifier):
class SVMQ(ExplicitLossMinimisation): class SVMQ(ELM):
def __init__(self, svmperf_base, **kwargs): def __init__(self, svmperf_base, **kwargs):
super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs) super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
class SVMKLD(ExplicitLossMinimisation): class SVMKLD(ELM):
def __init__(self, svmperf_base, **kwargs): def __init__(self, svmperf_base, **kwargs):
super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs) super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
class SVMNKLD(ExplicitLossMinimisation): class SVMNKLD(ELM):
def __init__(self, svmperf_base, **kwargs): def __init__(self, svmperf_base, **kwargs):
super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs) super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
class SVMAE(ExplicitLossMinimisation): class SVMAE(ELM):
def __init__(self, svmperf_base, **kwargs): def __init__(self, svmperf_base, **kwargs):
super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs) super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs)
class SVMRAE(ExplicitLossMinimisation): class SVMRAE(ELM):
def __init__(self, svmperf_base, **kwargs): def __init__(self, svmperf_base, **kwargs):
super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs) super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
CC = ClassifyAndCount ClassifyAndCount = CC
ACC = AdjustedClassifyAndCount AdjustedClassifyAndCount = ACC
PCC = ProbabilisticClassifyAndCount ProbabilisticClassifyAndCount = PCC
PACC = ProbabilisticAdjustedClassifyAndCount ProbabilisticAdjustedClassifyAndCount = PACC
ELM = ExplicitLossMinimisation ExplicitLossMinimisation = ELM
EMQ = ExpectationMaximizationQuantifier ExpectationMaximizationQuantifier = EMQ
HDy = HellingerDistanceY HellingerDistanceY = HDy
class OneVsAll(AggregativeQuantifier): class OneVsAll(AggregativeQuantifier):
@ -414,7 +415,7 @@ class OneVsAll(AggregativeQuantifier):
quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1. quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1.
This variant was used, along with the ExplicitLossMinimization quantifier in This variant was used, along with the ExplicitLossMinimization quantifier in
Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis. Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
Social Network Analysis and Mining6(19), 122 (2016) Social Network Analysis and Mining 6(19), 122 (2016)
""" """
def __init__(self, binary_quantifier, n_jobs=-1): def __init__(self, binary_quantifier, n_jobs=-1):
@ -484,15 +485,14 @@ class OneVsAll(AggregativeQuantifier):
self.dict_binary_quantifiers[c].fit(bindata) self.dict_binary_quantifiers[c].fit(bindata)
def isaggregative(model): def isaggregative(model:BaseQuantifier):
return isinstance(model, AggregativeQuantifier) return isinstance(model, AggregativeQuantifier)
def isprobabilistic(model): def isprobabilistic(model:BaseQuantifier):
return isinstance(model, AggregativeProbabilisticQuantifier) return isinstance(model, AggregativeProbabilisticQuantifier)
def isbinary(model):
return isinstance(model, BinaryQuantifier)

View File

@ -20,12 +20,24 @@ class BaseQuantifier(metaclass=ABCMeta):
@abstractmethod @abstractmethod
def get_params(self, deep=True): ... def get_params(self, deep=True): ...
@property
def binary(self):
return False
class BinaryQuantifier(BaseQuantifier): class BinaryQuantifier(BaseQuantifier):
def _check_binary(self, data: LabelledCollection, quantifier_name): def _check_binary(self, data: LabelledCollection, quantifier_name):
assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \ assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \
f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.' f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.'
@property
def binary(self):
return True
def isbinary(model:BaseQuantifier):
return model.binary
# class OneVsAll(AggregativeQuantifier): # class OneVsAll(AggregativeQuantifier):
# """ # """

View File

@ -76,11 +76,11 @@ class QuaNetTrainer(BaseQuantifier):
self.tr_prev = data.prevalence() self.tr_prev = data.prevalence()
self.quantifiers = { self.quantifiers = {
'cc': ClassifyAndCount(self.learner).fit(data, fit_learner=False), 'cc': CC(self.learner).fit(data, fit_learner=False),
'acc': AdjustedClassifyAndCount(self.learner).fit(data, fit_learner=False), 'acc': ACC(self.learner).fit(data, fit_learner=False),
'pcc': ProbabilisticClassifyAndCount(self.learner).fit(data, fit_learner=False), 'pcc': PCC(self.learner).fit(data, fit_learner=False),
'pacc': ProbabilisticAdjustedClassifyAndCount(self.learner).fit(data, fit_learner=False), 'pacc': PACC(self.learner).fit(data, fit_learner=False),
'emq': ExpectationMaximizationQuantifier(self.learner).fit(data, fit_learner=False), 'emq': EMQ(self.learner).fit(data, fit_learner=False),
} }
self.status = { self.status = {

202
quapy/plot.py Normal file
View File

@ -0,0 +1,202 @@
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import quapy as qp
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['figure.dpi'] = 200
def binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
fig, ax = plt.subplots()
ax.set_aspect('equal')
ax.grid()
ax.plot([0, 1], [0, 1], '--k', label='ideal', zorder=1)
for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
true_prev = true_prev[:,pos_class]
estim_prev = estim_prev[:,pos_class]
x_ticks = np.unique(true_prev)
x_ticks.sort()
y_ave = np.asarray([estim_prev[true_prev == x].mean() for x in x_ticks])
y_std = np.asarray([estim_prev[true_prev == x].std() for x in x_ticks])
ax.errorbar(x_ticks, y_ave, fmt='-', marker='o', label=method, markersize=3, zorder=2)
ax.fill_between(x_ticks, y_ave - y_std, y_ave + y_std, alpha=0.25)
ax.set(xlabel='true prevalence', ylabel='estimated prevalence', title=title)
ax.set_ylim(0, 1)
ax.set_xlim(0, 1)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
save_or_show(savepath)
def binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title=None, savepath=None):
fig, ax = plt.subplots()
ax.grid()
data, labels = [], []
for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
true_prev = true_prev[:,pos_class]
estim_prev = estim_prev[:,pos_class]
data.append(estim_prev-true_prev)
labels.append(method)
ax.boxplot(data, labels=labels, patch_artist=False, showmeans=True)
ax.set(ylabel='error bias', title=title)
save_or_show(savepath)
def binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title=None, nbins=21, colormap=cm.tab10,
vertical_xticks=False, savepath=None):
from pylab import boxplot, plot, setp
fig, ax = plt.subplots()
ax.grid()
bins = np.linspace(0, 1, nbins)
binwidth = 1/(nbins - 1)
data = {}
for method, true_prev, estim_prev in zip(method_names, true_prevs, estim_prevs):
true_prev = true_prev[:,pos_class]
estim_prev = estim_prev[:,pos_class]
data[method] = []
inds = np.digitize(true_prev, bins, right=True)
for ind in range(len(bins)):
selected = inds==ind
data[method].append(estim_prev[selected] - true_prev[selected])
nmethods = len(method_names)
boxwidth = binwidth/(nmethods+1)
for i,bin in enumerate(bins[:-1]):
boxdata = [data[method][i] for method in method_names]
positions = [bin+(i*boxwidth)+boxwidth for i,_ in enumerate(method_names)]
box = boxplot(boxdata, showmeans=False, positions=positions, widths = boxwidth, sym='+', patch_artist=True)
for boxid in range(len(method_names)):
c = colormap.colors[boxid]
setp(box['fliers'][boxid], color=c, marker='+', markersize=3., markeredgecolor=c)
setp(box['boxes'][boxid], color=c)
setp(box['medians'][boxid], color='k')
major_xticks_positions, minor_xticks_positions = [], []
major_xticks_labels, minor_xticks_labels = [], []
for i,b in enumerate(bins[:-1]):
major_xticks_positions.append(b)
minor_xticks_positions.append(b + binwidth / 2)
major_xticks_labels.append('')
minor_xticks_labels.append(f'[{bins[i]:.2f}-{bins[i + 1]:.2f})')
ax.set_xticks(major_xticks_positions)
ax.set_xticks(minor_xticks_positions, minor=True)
ax.set_xticklabels(major_xticks_labels)
ax.set_xticklabels(minor_xticks_labels, minor=True, rotation='vertical' if vertical_xticks else 'horizontal')
if vertical_xticks:
# Pad margins so that markers don't get clipped by the axes
plt.margins(0.2)
# Tweak spacing to prevent clipping of tick-labels
plt.subplots_adjust(bottom=0.15)
# adds the legend to the list hs, initialized with the "ideal" quantifier (one that has 0 bias across all bins. i.e.
# a line from (0,0) to (1,0). The other elements are simply labelled dot-plots that are to be removed (setting
# set_visible to False for all but the first element) after the legend has been placed
hs=[ax.plot([0, 1], [0, 0], '-k', zorder=2)[0]]
for colorid in range(len(method_names)):
h, = plot([1, 1], '-s', markerfacecolor=colormap.colors[colorid], color='k',
mec=colormap.colors[colorid], linewidth=1.)
hs.append(h)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(hs, ['ideal']+method_names, loc='center left', bbox_to_anchor=(1, 0.5))
[h.set_visible(False) for h in hs[1:]]
# x-axis and y-axis labels and limits
ax.set(xlabel='prevalence', ylabel='error bias', title=title)
# ax.set_ylim(-1, 1)
ax.set_xlim(0, 1)
save_or_show(savepath)
def error_by_drift(method_names, true_prevs, estim_prevs, tr_prevs, n_bins=21, error_name='ae', show_std=True,
title=f'Quantification error as a function of distribution shift',
savepath=None):
fig, ax = plt.subplots()
ax.grid()
x_error = qp.error.ae
y_error = getattr(qp.error, error_name)
ndims = tr_prevs[0].shape[-1]
# join all data, and keep the order in which the methods appeared for the first time
data = defaultdict(lambda:{'x':np.empty(shape=(0)), 'y':np.empty(shape=(0))})
method_order = []
for method, test_prevs_i, estim_prevs_i, tr_prev_i in zip(method_names, true_prevs, estim_prevs, tr_prevs):
tr_prev_i = np.repeat(tr_prev_i.reshape(1,-1), repeats=test_prevs_i.shape[0], axis=0)
tr_test_drifts = x_error(test_prevs_i, tr_prev_i)
data[method]['x'] = np.concatenate([data[method]['x'], tr_test_drifts])
method_drifts = y_error(test_prevs_i, estim_prevs_i)
data[method]['y'] = np.concatenate([data[method]['y'], method_drifts])
if method not in method_order:
method_order.append(method)
bins = np.linspace(0, 1, n_bins)
binwidth = 1 / (n_bins - 1)
min_x, max_x = None, None
for method in method_order:
tr_test_drifts = data[method]['x']
method_drifts = data[method]['y']
inds = np.digitize(tr_test_drifts, bins, right=True)
xs, ys, ystds = [], [], []
for ind in range(len(bins)):
selected = inds==ind
if selected.sum() > 0:
xs.append(ind*binwidth)
ys.append(np.mean(method_drifts[selected]))
ystds.append(np.std(method_drifts[selected]))
xs = np.asarray(xs)
ys = np.asarray(ys)
ystds = np.asarray(ystds)
min_x_method, max_x_method = xs.min(), xs.max()
min_x = min_x_method if min_x is None or min_x_method < min_x else min_x
max_x = max_x_method if max_x is None or max_x_method > max_x else max_x
ax.errorbar(xs, ys, fmt='-', marker='o', label=method, markersize=3, zorder=2)
if show_std:
ax.fill_between(xs, ys-ystds, ys+ystds, alpha=0.25)
ax.set(xlabel=f'Distribution shift between training set and test sample',
ylabel=f'{error_name.upper()} (true distribution, predicted distribution)',
title=title)
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_xlim(min_x, max_x)
save_or_show(savepath)
def save_or_show(savepath):
# if savepath is specified, then saves the plot in that path; otherwise the plot is shown
if savepath is not None:
qp.util.create_parent_dir(savepath)
# plt.tight_layout()
plt.savefig(savepath)
else:
plt.show()

View File

@ -64,6 +64,10 @@ def get_quapy_home():
return home return home
def create_parent_dir(path):
os.makedirs(Path(path).parent, exist_ok=True)
def pickled_resource(pickle_path:str, generation_func:callable, *args): def pickled_resource(pickle_path:str, generation_func:callable, *args):
if pickle_path is None: if pickle_path is None:
return generation_func(*args) return generation_func(*args)