ipacc test

2021-06-22 08:21:41 +02:00 · 2021-06-22 08:21:41 +02:00 · fe8010978d
parent 986e61620c
commit fe8010978d
1 changed files with 67 additions and 4 deletions
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -316,16 +316,21 @@ class PACC(AggregativeProbabilisticQuantifier):
        self.pcc = PCC(self.learner)
        self.Pte_cond_estim_ = self.ProbConfusionTable(classes, y, y_)
        return self
    @classmethod
    def ProbConfusionTable(cls, classes, y, y_):
        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
        # document that belongs to yj ends up being classified as belonging to yi
        n_classes = len(classes)
        confusion = np.empty(shape=(n_classes, n_classes))
        for i, class_ in enumerate(classes):
-            confusion[i] = y_[y == class_].mean(axis=0)
+            sel = y_[y == class_]
            confusion[i] = sel.mean(axis=0) if sel.size>0 else 0
-        self.Pte_cond_estim_ = confusion.T
+        return confusion.T
        return self
    def aggregate(self, classif_posteriors):
        prevs_estim = self.pcc.aggregate(classif_posteriors)
@ -391,6 +396,64 @@ class EMQ(AggregativeProbabilisticQuantifier):
        return qs, ps
 class IPACC(AggregativeProbabilisticQuantifier):
    MAX_ITER = 2
    EPSILON = 1e-2
    def __init__(self, learner: BaseEstimator):
        self.learner = learner
    def fit(self, data: LabelledCollection, fit_learner=True):
        self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
        self.train_prevalence = F.prevalence_from_labels(data.labels, self.classes_)
        return self
    def aggregate(self, classif_posteriors, epsilon=EPSILON):
        priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon)
        return priors
    def predict_proba(self, instances, epsilon=EPSILON):
        classif_posteriors = self.learner.predict_proba(instances)
        priors, posteriors = self.EM(self.train_prevalence, classif_posteriors, epsilon)
        return posteriors
    @classmethod
    def EM(cls, tr_prev, posterior_probabilities, epsilon=EPSILON):
        from scipy.special import softmax
        Px = posterior_probabilities
        Ptr = np.copy(tr_prev)
        qs = np.copy(Ptr)  # qs (the running estimate) is initialized as the training prevalence
        classes = np.arange(posterior_probabilities.shape[1])
        s, converged = 0, False
        qs_prev_ = None
        while not converged and s < EMQ.MAX_ITER:
            # E-step: ps is Ps(y|xi)
            ps_unnormalized = (qs / Ptr) * Px
            ps = ps_unnormalized / ps_unnormalized.sum(axis=1, keepdims=True)
            # ps = softmax(ps_unnormalized, axis=1)
            # M-step:
            y_belief = np.argmax(ps, axis=-1)
            p_conf_table = PACC.ProbConfusionTable(classes, y_belief, ps)
            pcc_estim = ps.mean(axis=0)
            pacc_estims = ACC.solve_adjustment(p_conf_table, pcc_estim)
            qs = pcc_estim*0.5 + pacc_estims*0.5
            if qs_prev_ is not None and qp.error.mae(qs, qs_prev_) < epsilon and s > 10:
                converged = True
            qs_prev_ = qs
            s += 1
            # print(s, qs_prev_)
        if not converged:
            print('[warning] the method has reached the maximum number of iterations; it might have not converged')
        return qs, ps
 class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
    """
    Implementation of the method based on the Hellinger Distance y (HDy) proposed by