many aggregative methods added

2020-12-03 18:12:28 +01:00 · 2020-12-03 18:12:28 +01:00 · a882424eeb
parent 78988c87f9
commit a882424eeb
10 changed files with 1095 additions and 1 deletions
--- a/TODO.txt
+++ b/TODO.txt
@ -1,2 +1,3 @@
 Documentation with sphinx
-The parallel training in svmperf seems not to work
+The parallel training in svmperf seems not to work
+Add "prepare svmperf for quantification" script
--- a/prepare_svmperf.sh
+++ b/prepare_svmperf.sh
@ -0,0 +1,28 @@
+#!/bin/bash
+set -x
+
+URL=http://download.joachims.org/svm_perf/current/svm_perf.tar.gz
+FILE=./svm_perf.tar.gz
+wget $URL $FILE
+mkdir ./svm_perf
+tar xvzf $FILE -C ./svm_perf
+rm $FILE
+
+#para crear el patch [para mi]
+#diff -Naur svm_perf svm_perf_quantification > svm-perf-quantification-ext.patch
+
+#para crear la modificacion
+#cp svm_perf svm_perf_quantification -r [ESTO NO HACE FALTA]
+patch -s -p0 < svm-perf-quantification-ext.patch
+mv svm_perf svm_perf_quantification
+cd svm_perf_quantification
+make
+
+
+
+
+
+
+
+
+
--- a/quapy/init.py
+++ b/quapy/init.py
@ -0,0 +1,6 @@
+from .dataset import *
+from . import functional
+from . import method
+from . import error
+
+
--- a/quapy/error.py
+++ b/quapy/error.py
@ -0,0 +1,47 @@
+from sklearn.metrics import f1_score
+from settings import SAMPLE_SIZE
+
+
+def f1e(y_true, y_pred):
+    return 1. - f1_score(y_true, y_pred, average='macro')
+
+
+def acce(y_true, y_pred):
+    acc = (y_true == y_pred).mean()
+    return 1. - acc
+
+
+def mae(prevs, prevs_hat):
+    return ae(prevs, prevs_hat).mean()
+
+
+def ae(p, p_hat):
+    assert p.shape == p_hat.shape, 'wrong shape'
+    return abs(p_hat-p).mean(axis=-1)
+
+
+def mrae(p, p_hat, eps=1./(2. * SAMPLE_SIZE)):
+    return rae(p, p_hat, eps).mean()
+
+
+def rae(p, p_hat, eps=1./(2. * SAMPLE_SIZE)):
+    p = smooth(p, eps)
+    p_hat = smooth(p_hat, eps)
+    return (abs(p-p_hat)/p).mean(axis=-1)
+
+
+def smooth(p, eps):
+    n_classes = p.shape[-1]
+    return (p+eps)/(eps*n_classes + 1)
+
+
+CLASSIFICATION_ERROR = {f1e, acce}
+QUANTIFICATION_ERROR = {mae, mrae}
+
+f1_error = f1e
+acc_error = acce
+mean_absolute_error = mae
+absolute_error = ae
+mean_relative_absolute_error = mrae
+relative_absolute_error = rae
+
--- a/quapy/functional.py
+++ b/quapy/functional.py
@ -0,0 +1,49 @@
+from collections import defaultdict
+import numpy as np
+import itertools
+
+
+def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False):
+    s = np.linspace(0., 1., n_prevalences, endpoint=True)
+    s = [s] * (dimensions - 1)
+    prevs = [p for p in itertools.product(*s, repeat=1) if sum(p)<=1]
+    if return_constrained_dim:
+        prevs = [p+(1-sum(p),) for p in prevs]
+    prevs = np.asarray(prevs).reshape(len(prevs), -1)
+    if repeat>1:
+        prevs = np.repeat(prevs, repeat, axis=0)
+    return prevs
+
+
+def prevalence_from_labels(labels, n_classes):
+    unique, counts = np.unique(labels, return_counts=True)
+    by_class = defaultdict(lambda:0, dict(zip(unique, counts)))
+    prevalences = np.asarray([by_class[ci] for ci in range(n_classes)], dtype=np.float)
+    prevalences /= prevalences.sum()
+    return prevalences
+
+
+def prevalence_from_probabilities(posteriors, binarize: bool = False):
+    if binarize:
+        predictions = np.argmax(posteriors, axis=-1)
+        return prevalence_from_labels(predictions, n_classes=posteriors.shape[1])
+    else:
+        prevalences = posteriors.mean(axis=0)
+        prevalences /= prevalences.sum()
+        return prevalences
+
+
+def strprev(prevalences, prec=3):
+    return '['+ ', '.join([f'{p:.{prec}f}' for p in prevalences]) + ']'
+
+
+def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True):
+    den = tpr - fpr
+    if den == 0:
+        den += 1e-8
+    adjusted = (prevalence_estim - fpr) / den
+    if clip:
+        adjusted = np.clip(adjusted, 0., 1.)
+    return adjusted
+
+
--- a/quapy/method/init.py
+++ b/quapy/method/init.py
@ -0,0 +1,30 @@
+from . import aggregative as agg
+from . import non_aggregative as nagg
+
+
+AGGREGATIVE_METHODS = {
+    agg.ClassifyAndCount,
+    agg.AdjustedClassifyAndCount,
+    agg.ProbabilisticClassifyAndCount,
+    agg.ProbabilisticAdjustedClassifyAndCount,
+    agg.ExplicitLossMinimisation,
+    agg.ExpectationMaximizationQuantifier,
+}
+
+NON_AGGREGATIVE_METHODS = {
+    nagg.MaximumLikelihoodPrevalenceEstimation
+}
+
+QUANTIFICATION_METHODS = AGGREGATIVE_METHODS | NON_AGGREGATIVE_METHODS
+
+
+# common alisases
+CC = agg.ClassifyAndCount
+ACC = agg.AdjustedClassifyAndCount
+PCC = agg.ProbabilisticClassifyAndCount
+PACC = agg.ProbabilisticAdjustedClassifyAndCount
+ELM = agg.ExplicitLossMinimisation
+EMQ = agg.ExpectationMaximizationQuantifier
+MLPE = nagg.MaximumLikelihoodPrevalenceEstimation
+
+
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -0,0 +1,351 @@
+import numpy as np
+from .base import *
+from ..error import mae
+import functional as F
+from ..classification.svmperf import SVMperf
+from ..dataset import LabelledCollection
+from sklearn.metrics import confusion_matrix
+from sklearn.calibration import CalibratedClassifierCV
+from joblib import Parallel, delayed
+
+
+# Abstract classes
+# ------------------------------------
+
+class AggregativeQuantifier(BaseQuantifier):
+    """
+    Abstract class for quantification methods that base their estimations on the aggregation of classification
+    results. Aggregative Quantifiers thus implement a _classify_ method and maintain a _learner_ attribute.
+    """
+
+    @abstractmethod
+    def fit(self, data: LabelledCollection, fit_learner=True, *args): ...
+
+    def classify(self, documents):
+        return self.learner.predict(documents)
+
+    def get_params(self, deep=True):
+        return self.learner.get_params()
+
+    def set_params(self, **parameters):
+        self.learner.set_params(**parameters)
+
+    @property
+    def n_classes(self):
+        return len(self.classes)
+
+    @property
+    def classes(self):
+        return self.learner.classes_
+
+
+class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
+    """
+    Abstract class for quantification methods that base their estimations on the aggregation of posterior probabilities
+    as returned by a probabilistic classifier. Aggregative Probabilistic Quantifiers thus extend Aggregative
+    Quantifiersimplement by implementing a _soft_classify_ method returning values in [0,1] -- the posterior
+    probabilities.
+    """
+
+    def soft_classify(self, data):
+        return self.learner.predict_proba(data)
+
+    def set_params(self, **parameters):
+        if isinstance(self.learner, CalibratedClassifierCV):
+            parameters={'base_estimator__'+k:v for k,v in parameters.items()}
+        self.learner.set_params(**parameters)
+
+
+# Helper
+# ------------------------------------
+def training_helper(learner,
+                    data: LabelledCollection,
+                    fit_learner: bool = True,
+                    ensure_probabilistic=False,
+                    train_val_split=None):
+    """
+    Training procedure common to all Aggregative Quantifiers.
+    :param learner: the learner to be fit
+    :param data: the data on which to fit the learner. If requested, the data will be split before fitting the learner.
+    :param fit_learner: whether or not to fit the learner
+    :param ensure_probabilistic: if True, guarantees that the resulting classifier implements predict_proba (if the
+    learner is not probabilistic, then a CalibratedCV instance of it is trained)
+    :param train_val_split: if specified, indicates the proportion of training documents on which to fit the learner
+    :return: the learner trained on the training set, and the unused data (a _LabelledCollection_ if train_val_split>0
+    or None otherwise)
+    """
+    if fit_learner:
+        if ensure_probabilistic:
+            if not hasattr(learner, 'predict_proba'):
+                print(f'The learner {learner.__class__.__name__} does not seem to be probabilistic. '
+                      f'The learner will be calibrated.')
+                learner = CalibratedClassifierCV(learner, cv=5)
+        if train_val_split is not None:
+            if not (0 < train_val_split < 1):
+                raise ValueError(f'train/val split {train_val_split} out of range, must be in (0,1)')
+            train, unused = data.split_stratified(train_prop=train_val_split)
+        else:
+            train, unused = data, None
+        learner.fit(train.instances, train.labels)
+    else:
+        if ensure_probabilistic:
+            if not hasattr(learner, 'predict_proba'):
+                raise AssertionError('error: the learner cannot be calibrated since fit_learner is set to False')
+        unused = data
+
+    return learner, unused
+
+
+# Methods
+# ------------------------------------
+class ClassifyAndCount(AggregativeQuantifier):
+    """
+    The most basic Quantification method. One that simply classifies all instances and countes how many have been
+    attributed each of the classes in order to compute class prevalence estimates.
+    """
+
+    def __init__(self, learner):
+        self.learner = learner
+
+    def fit(self, data: LabelledCollection, fit_learner=True, *args):
+        """
+        Trains the Classify & Count method unless _fit_learner_ is False, in which case it is assumed to be already fit.
+        :param data: training data
+        :param fit_learner: if False, the classifier is assumed to be fit
+        :param args: unused
+        :return: self
+        """
+        self.learner, _ = training_helper(self.learner, data, fit_learner)
+        return self
+
+    def quantify(self, documents, *args):
+        classification = self.classify(documents)  # classify
+        return F.prevalence_from_labels(classification, self.n_classes)  # & count
+
+
+class AdjustedClassifyAndCount(AggregativeQuantifier):
+
+    def __init__(self, learner):
+        self.learner = learner
+
+    def fit(self, data: LabelledCollection, fit_learner=True, train_val_split=0.6):
+        self.learner, validation = training_helper(self.learner, data, fit_learner, train_val_split=train_val_split)
+        self.cc = ClassifyAndCount(self.learner)
+        y_ = self.cc.classify(validation.instances)
+        y  = validation.labels
+        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
+        # document that belongs to yj ends up being classified as belonging to yi
+        self.Pte_cond_estim_ = confusion_matrix(y,y_).T / validation.counts()
+        return self
+
+    def quantify(self, documents, *args):
+        prevs_estim = self.cc.quantify(documents)
+        # solve for the linear system Ax = B with A=Pte_cond_estim and B = prevs_estim
+        A = self.Pte_cond_estim_
+        B = prevs_estim
+        try:
+            adjusted_prevs = np.linalg.solve(A, B)
+            adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
+            adjusted_prevs /= adjusted_prevs.sum()
+        except np.linalg.LinAlgError:
+            adjusted_prevs = prevs_estim  # no way to adjust them!
+        return adjusted_prevs
+
+    def classify(self, data):
+        return self.cc.classify(data)
+
+
+class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier):
+    def __init__(self, learner):
+        self.learner = learner
+
+    def fit(self, data : LabelledCollection, fit_learner=True, *args):
+        self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
+        return self
+
+    def quantify(self, documents, *args):
+        posteriors = self.soft_classify(documents)                        # classify
+        prevalences = F.prevalence_from_probabilities(posteriors, binarize=False)  # & count
+        return prevalences
+
+
+class ProbabilisticAdjustedClassifyAndCount(AggregativeQuantifier):
+
+    def __init__(self, learner):
+        self.learner = learner
+
+    def fit(self, data: LabelledCollection, fit_learner=True, train_val_split=0.6):
+        self.learner, validation = training_helper(
+            self.learner, data, fit_learner, ensure_probabilistic=True, train_val_split=train_val_split
+        )
+        self.pcc = ProbabilisticClassifyAndCount(self.learner)
+        y_ = self.pcc.classify(validation.instances)
+        y = validation.labels
+        # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a
+        # document that belongs to yj ends up being classified as belonging to yi
+        self.Pte_cond_estim_ = confusion_matrix(y, y_).T / validation.counts()
+        return self
+
+    def quantify(self, documents, *args):
+        prevs_estim = self.pcc.quantify(documents)
+        A = self.Pte_cond_estim_
+        B = prevs_estim
+        try:
+            adjusted_prevs = np.linalg.solve(A, B)
+            adjusted_prevs = np.clip(adjusted_prevs, 0, 1)
+            adjusted_prevs /= adjusted_prevs.sum()
+        except np.linalg.LinAlgError:
+            adjusted_prevs = prevs_estim  # no way to adjust them!
+        return adjusted_prevs
+
+    def classify(self, data):
+        return self.pcc.classify(data)
+
+
+class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier):
+
+    MAX_ITER = 1000
+    EPSILON = 1e-4
+
+    def __init__(self, learner, verbose=False):
+        self.learner = learner
+        self.verbose = verbose
+
+    def fit(self, data: LabelledCollection, fit_learner=True, *args):
+        self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True)
+        self.train_prevalence = F.prevalence_from_labels(data.labels, self.n_classes)
+        return self
+
+    def quantify(self, X, epsilon=EPSILON):
+        tr_prev=self.train_prevalence
+        posteriors = self.soft_classify(X)
+        return self.EM(tr_prev, posteriors, self.verbose, epsilon)
+
+    @classmethod
+    def EM(cls, tr_prev, posterior_probabilities, verbose=False, epsilon=EPSILON):
+        Px = posterior_probabilities
+        Ptr = np.copy(tr_prev)
+        qs = np.copy(Ptr)  # qs (the running estimate) is initialized as the training prevalence
+
+        s, converged = 0, False
+        qs_prev_ = None
+        while not converged and s < ExpectationMaximizationQuantifier.MAX_ITER:
+            # E-step: ps is Ps(y=+1|xi)
+            ps_unnormalized = (qs / Ptr) * Px
+            ps = ps_unnormalized / ps_unnormalized.sum(axis=1).reshape(-1,1)
+
+            # M-step: qs_pos is Ps+1(y=+1)
+            qs = ps.mean(axis=0)
+
+            if qs_prev_ is not None and mae(qs, qs_prev_) < epsilon and s>10:
+                converged = True
+
+            qs_prev_ = qs
+            s += 1
+
+        if verbose:
+            print('-'*80)
+
+        if not converged:
+            raise UserWarning('the method has reached the maximum number of iterations; it might have not converged')
+
+        return qs
+
+
+# todo: from here
+def train_task(c, learners, data):
+    learners[c].fit(data.documents, data.labels == c)
+
+
+def binary_quant_task(c, learners, X):
+    predictions_ci = learners[c].predict(X)
+    return predictions_ci.mean()  # since the predictions array is binary
+
+
+class OneVsAllELM(AggregativeQuantifier):
+
+    def __init__(self, svmperf_base, loss, n_jobs=-1, **kwargs):
+        self.svmperf_base = svmperf_base
+        self.loss = loss
+        self.n_jobs = n_jobs
+        self.kwargs = kwargs
+
+    def fit(self, data: LabelledCollection, fit_learner=True, *args):
+        assert fit_learner, 'the method requires that fit_learner=True'
+
+        self.learners = {c: SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) for c in data.classes_}
+        Parallel(n_jobs=self.n_jobs, backend='threading')(
+            delayed(train_task)(c, self.learners, data) for c in self.learners.keys()
+        )
+        return self
+
+    def quantify(self, X, y=None):
+        prevalences = np.asarray(
+            Parallel(n_jobs=self.n_jobs, backend='threading')(
+                delayed(binary_quant_task)(c, self.learners, X) for c in self.learners.keys()
+            )
+        )
+        prevalences /= prevalences.sum()
+        return prevalences
+
+    @property
+    def classes(self):
+        return sorted(self.learners.keys())
+
+    def preclassify_collection(self, data: LabelledCollection):
+        classifications = []
+        for class_ in data.classes_:
+            classifications.append(self.learners[class_].predict(data.instances))
+        classifications = np.vstack(classifications).T
+        precomputed = LabelledCollection(classifications, data.labels)
+        return precomputed
+
+    def set_params(self, **parameters):
+        self.kwargs=parameters
+
+    def get_params(self, deep=True):
+        return self.kwargs
+
+
+class ExplicitLossMinimisation(AggregativeQuantifier):
+
+    def __init__(self, svmperf_base, loss, **kwargs):
+        self.learner = SVMperf(svmperf_base, loss=loss, **kwargs)
+
+    def fit(self, data: LabelledCollection, fit_learner=True, *args):
+        assert fit_learner, 'the method requires that fit_learner=True'
+        self.learner.fit(data.instances, data.labels)
+        return self
+
+    def quantify(self, X, y=None):
+        predictions = self.learner.predict(X)
+        return F.prevalence_from_labels(predictions, self.learner.n_classes_)
+
+    def classify(self, X, y=None):
+        return self.learner.predict(X)
+
+
+class SVMQ(ExplicitLossMinimisation):
+    def __init__(self, svmperf_base, **kwargs):
+        super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
+
+
+class SVMKLD(ExplicitLossMinimisation):
+    def __init__(self, svmperf_base, **kwargs):
+        super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
+
+
+class SVMNKLD(ExplicitLossMinimisation):
+    def __init__(self, svmperf_base, **kwargs):
+        super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
+
+
+class SVMAE(ExplicitLossMinimisation):
+    def __init__(self, svmperf_base, **kwargs):
+        super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs)
+
+
+class SVMRAE(ExplicitLossMinimisation):
+    def __init__(self, svmperf_base, **kwargs):
+        super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
+
--- a/quapy/method/base.py
+++ b/quapy/method/base.py
@ -0,0 +1,21 @@
+from abc import ABCMeta, abstractmethod
+import quapy as qp
+
+
+# Base Quantifier abstract class
+# ------------------------------------
+class BaseQuantifier(metaclass=ABCMeta):
+
+    @abstractmethod
+    def fit(self, data: qp.LabelledCollection, *args): ...
+
+    @abstractmethod
+    def quantify(self, documents, *args): ...
+
+    @abstractmethod
+    def set_params(self, **parameters): ...
+
+    @abstractmethod
+    def get_params(self, deep=True): ...
+
+
--- a/quapy/method/non_aggregative.py
+++ b/quapy/method/non_aggregative.py
@ -0,0 +1,21 @@
+from quapy import LabelledCollection
+from .base import BaseQuantifier
+
+
+
+class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier):
+
+    def __init__(self, **kwargs):
+        pass
+
+    def fit(self, data: LabelledCollection, *args):
+        self.estimated_prevalence = data.prevalence()
+
+    def quantify(self, documents, *args):
+        return self.estimated_prevalence
+
+    def get_params(self):
+        pass
+
+    def set_params(self, **parameters):
+        pass
--- a/svm-perf-quantification-ext.patch
+++ b/svm-perf-quantification-ext.patch
@ -0,0 +1,540 @@
+diff -ruN svm_perf/svm_struct/svm_struct_main.c svm_perf_quantification/svm_struct/svm_struct_main.c
+--- svm_perf/svm_struct/svm_struct_main.c	2020-10-28 12:23:19.000000000 +0100
+++ svm_perf_quantification/svm_struct/svm_struct_main.c	2020-10-28 12:23:53.000000000 +0100
+@@ -128,7 +128,8 @@
+   struct_parm->newconstretrain=100;
+   struct_parm->ccache_size=5;
+   struct_parm->batch_size=100;
+-
+  struct_parm->loss_parm=1.0;
+  struct_parm->beta=1.0; // AIC-QBETA
+   strcpy (modelfile, "svm_struct_model");
+   strcpy (learn_parm->predfile, "trans_predictions");
+   strcpy (learn_parm->alphafile, "");
+@@ -170,6 +171,7 @@
+       case 'p': i++; struct_parm->slack_norm=atol(argv[i]); break;
+       case 'e': i++; struct_parm->epsilon=atof(argv[i]); break;
+       case 'k': i++; struct_parm->newconstretrain=atol(argv[i]); break;
+      case 'j': i++; struct_parm->beta=atof(argv[i]); break; // AIC-QBETA
+       case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break;
+       case '#': i++; learn_parm->maxiter=atol(argv[i]); break;
+       case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break;
+@@ -189,6 +191,7 @@
+       case '-': strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);i++; strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);break; 
+       case 'v': i++; (*struct_verbosity)=atol(argv[i]); break;
+       case 'y': i++; (*verbosity)=atol(argv[i]); break;
+      case '!': i++; struct_parm->loss_parm=atof(argv[i]); break;
+       default: printf("\nUnrecognized option %s!\n\n",argv[i]);
+ 	       print_help();
+ 	       exit(0);
+@@ -396,6 +399,9 @@
+   printf("                        (in the same order as in the training set)\n");
+   printf("Application-Specific Options:\n");
+   print_struct_help();
+  printf("*************************************************\n"); // AIC-QBETA
+  printf("         -j float    -> parameter beta for qbeta-based loss functions (default: 1.0)\n");
+  printf("*************************************************\n");
+   wait_any_key();
+ 
+   printf("\nMore details in:\n");
+diff -ruN svm_perf/svm_struct_api.c svm_perf_quantification/svm_struct_api.c
+--- svm_perf/svm_struct_api.c	2020-10-28 12:23:19.000000000 +0100
+++ svm_perf_quantification/svm_struct_api.c	2020-10-28 12:23:53.000000000 +0100
+@@ -20,6 +20,7 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+#include <math.h>
+ #include "svm_struct_api.h"
+ #include "svm_light/svm_common.h"
+ #include "svm_struct/svm_struct_common.h"
+@@ -27,7 +28,9 @@
+ 
+ #define MAX(x,y)      ((x) < (y) ? (y) : (x))
+ #define MIN(x,y)      ((x) > (y) ? (y) : (x))
+#define ABS(x)      ((x) < (0) ? (-(x)) : (x))
+ #define SIGN(x)       ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0))))
+#define PI (3.141592653589793)
+ 
+ int compareup(const void *a, const void *b) 
+ {
+@@ -72,6 +75,16 @@
+ double rocarea(LABEL y, LABEL ybar);
+ double prbep(LABEL y, LABEL ybar);
+ double avgprec(LABEL y, LABEL ybar);
+/* AIC-QBETA */
+double gm(int a, int b, int c, int d);
+double nae(int a, int b, int c, int d);
+double ae(int a, int b, int c, int d);
+double rae(int a, int b, int c, int d);
+double Qbeta(int a, int b, int c, int d, double beta);
+double Qbeta_acc(int a, int b, int c, int d, double beta);
+double Qbeta_f1(int a, int b, int c, int d, double beta);
+double Qbeta_gm(int a, int b, int c, int d, double beta);
+/* AIC-QBETA */
+ 
+ double zeroone_loss(int a, int b, int c, int d);
+ double fone_loss(int a, int b, int c, int d);
+@@ -82,6 +95,23 @@
+ double swappedpairs_loss(LABEL y, LABEL ybar);
+ double avgprec_loss(LABEL y, LABEL ybar);
+ 
+double kldiv(int a, int b, int c, int d); // KLD
+double kldiv_loss(int a, int b, int c, int d); // KLD
+double nkldiv_loss(int a, int b, int c, int d); // KLD
+
+double milli_loss(int a, int b, int c, int d); //MILL
+
+/* AIC-QBETA */
+double gm_loss(int a, int b, int c, int d);
+double nae_loss(int a, int b, int c, int d);
+double ae_loss(int a, int b, int c, int d);
+double rae_loss(int a, int b, int c, int d);
+double Qbeta_loss(int a, int b, int c, int d, double beta);
+double Qbeta_acc_loss(int a, int b, int c, int d, double beta);
+double Qbeta_f1_loss(int a, int b, int c, int d, double beta);
+double Qbeta_gm_loss(int a, int b, int c, int d, double beta);
+/* AIC-QBETA */
+
+ void        svm_struct_learn_api_init(int argc, char* argv[])
+ {
+   /* Called in learning part before anything else is done to allow
+@@ -181,10 +211,22 @@
+   /* change label value for better scaling using thresholdmetrics */
+   if((sparm->loss_function == ZEROONE) 
+      || (sparm->loss_function == FONE) 
+	 || (sparm->loss_function == GM) // AIC-QBETA 
+	 || (sparm->loss_function == NAE) // AIC-QBETA 
+	 || (sparm->loss_function == AE) // AIC-QBETA 
+	 || (sparm->loss_function == RAE) // AIC-QBETA 
+	 || (sparm->loss_function == QBETA) // AIC-QBETA 
+	 || (sparm->loss_function == QBETA_ACC) // AIC-QBETA
+	 || (sparm->loss_function == QBETA_F1) // AIC-QBETA 
+	 || (sparm->loss_function == QBETA_GM) // AIC-QBETA
+      || (sparm->loss_function == ERRORRATE)
+      || (sparm->loss_function == PRBEP) 
+      || (sparm->loss_function == PREC_K) 
+-     || (sparm->loss_function == REC_K)) {
+     || (sparm->loss_function == REC_K)
+     || (sparm->loss_function == KLD)
+     || (sparm->loss_function == NKLD)
+     || (sparm->loss_function == MILLI)
+     ) {
+     for(i=0;i<sample.examples[0].x.totdoc;i++) {
+       if(sample.examples[0].y.class[i]>0)
+ 	sample.examples[0].y.class[i]=0.5*100.0/(numn+nump);
+@@ -520,10 +562,22 @@
+   LABEL ybar;
+   if((sparm->loss_function == ZEROONE) 
+      || (sparm->loss_function == FONE) 
+	 || (sparm->loss_function == GM) // AIC-QBETA 
+	 || (sparm->loss_function == NAE) // AIC-QBETA
+	 || (sparm->loss_function == AE) // AIC-QBETA 
+	 || (sparm->loss_function == RAE) // AIC-QBETA
+	 || (sparm->loss_function == QBETA) // AIC-QBETA 
+	 || (sparm->loss_function == QBETA_ACC) // AIC-QBETA
+	 || (sparm->loss_function == QBETA_F1) // AIC-QBETA 
+	 || (sparm->loss_function == QBETA_GM) // AIC-QBETA
+      || (sparm->loss_function == ERRORRATE)
+      || (sparm->loss_function == PRBEP) 
+      || (sparm->loss_function == PREC_K) 
+-     || (sparm->loss_function == REC_K)) {
+     || (sparm->loss_function == REC_K)
+     || (sparm->loss_function == KLD)
+     || (sparm->loss_function == NKLD)
+     || (sparm->loss_function == MILLI)
+     ) {
+     ybar=find_most_violated_constraint_thresholdmetric(x,y,sm,sparm,
+ 						       sparm->loss_type);
+   }
+@@ -562,9 +616,21 @@
+      sparm->loss_type); */
+   else if((sparm->loss_function == ZEROONE) 
+      || (sparm->loss_function == FONE) 
+	 || (sparm->loss_function == GM) // AIC-QBETA 
+	 || (sparm->loss_function == NAE) // AIC-QBETA 
+	 || (sparm->loss_function == AE) // AIC-QBETA 
+	 || (sparm->loss_function == RAE) // AIC-QBETA 
+	 || (sparm->loss_function == QBETA) // AIC-QBETA 
+	 || (sparm->loss_function == QBETA_ACC) // AIC-QBETA
+	 || (sparm->loss_function == QBETA_F1) // AIC-QBETA 
+	 || (sparm->loss_function == QBETA_GM) // AIC-QBETA
+      || (sparm->loss_function == PRBEP) 
+      || (sparm->loss_function == PREC_K) 
+-     || (sparm->loss_function == REC_K)) 
+     || (sparm->loss_function == REC_K)
+     || (sparm->loss_function == KLD)
+     || (sparm->loss_function == NKLD)
+     || (sparm->loss_function == MILLI)
+	  ) 
+     ybar=find_most_violated_constraint_thresholdmetric(x,y,sm,sparm,
+ 						       sparm->loss_type);
+   else if((sparm->loss_function == SWAPPEDPAIRS))
+@@ -741,7 +807,23 @@
+       if(sparm->loss_function == ZEROONE)
+ 	loss=zeroone_loss(a,numn-d,nump-a,d);
+       else if(sparm->loss_function == FONE)
+-	loss=fone_loss(a,numn-d,nump-a,d);
+		  loss=fone_loss(a,numn-d,nump-a,d);
+	  else if(sparm->loss_function == GM)  // AIC-QBETA
+		  loss=gm_loss(a,numn-d,nump-a,d);
+	  else if(sparm->loss_function == NAE)  // AIC-QBETA
+		  loss=nae_loss(a,numn-d,nump-a,d);
+	  else if(sparm->loss_function == AE)  // AIC-QBETA
+		  loss=ae_loss(a,numn-d,nump-a,d);
+	  else if(sparm->loss_function == RAE)  // AIC-QBETA
+		  loss=rae_loss(a,numn-d,nump-a,d);
+	  else if(sparm->loss_function == QBETA)  // AIC-QBETA
+		  loss=Qbeta_loss(a,numn-d,nump-a,d,sparm->beta);
+	  else if(sparm->loss_function == QBETA_ACC)  // AIC-QBETA
+		  loss=Qbeta_acc_loss(a,numn-d,nump-a,d,sparm->beta);
+	  else if(sparm->loss_function == QBETA_F1)  // AIC-QBETA
+		  loss=Qbeta_f1_loss(a,numn-d,nump-a,d,sparm->beta);
+	  else if(sparm->loss_function == QBETA_GM)  // AIC-QBETA
+		  loss=Qbeta_gm_loss(a,numn-d,nump-a,d,sparm->beta);
+       else if(sparm->loss_function == ERRORRATE)
+ 	loss=errorrate_loss(a,numn-d,nump-a,d);
+       else if((sparm->loss_function == PRBEP) && (a+numn-d == nump))
+@@ -750,6 +832,12 @@
+ 	loss=prec_k_loss(a,numn-d,nump-a,d);
+       else if((sparm->loss_function == REC_K) && (a+numn-d <= prec_rec_k)) 
+ 	loss=rec_k_loss(a,numn-d,nump-a,d);
+      else if(sparm->loss_function == KLD) //KLD
+	loss=kldiv_loss(a,numn-d,nump-a,d); //KLD
+      else if(sparm->loss_function == NKLD) //KLD
+	loss=nkldiv_loss(a,numn-d,nump-a,d); //KLD
+      else if(sparm->loss_function == MILLI) //MILLI
+	loss=milli_loss(a,numn-d,nump-a,d); //MILLI
+       else {
+ 	loss=0;
+       }
+@@ -1213,6 +1301,7 @@
+     }
+     /* printf("%f %f\n",y.class[i],ybar.class[i]); */
+   }
+	//printf("********** loss %d (a,b,c,d) (%d,%d,%d,%d) beta=%f\n",sparm->loss_function,a,b,c,d,sparm->beta);
+   /* Return the loss according to the selected loss function. */
+   if(sparm->loss_function == ZEROONE) { /* type 0 loss: 0/1 loss */
+                                   /* return 0, if y==ybar. return 1 else */
+@@ -1221,6 +1310,30 @@
+   else if(sparm->loss_function == FONE) {
+     loss=fone_loss(a,b,c,d);
+   }
+  else if(sparm->loss_function == GM) { // AIC-QBETA
+	  loss=gm_loss(a,b,c,d);
+  }
+  else if(sparm->loss_function == NAE) { // AIC-QBETA
+	  loss=nae_loss(a,b,c,d);
+  }
+  else if(sparm->loss_function == AE) { // AIC-QBETA
+	  loss=ae_loss(a,b,c,d);
+  }
+  else if(sparm->loss_function == RAE) { // AIC-QBETA
+	  loss=rae_loss(a,b,c,d);
+  }
+  else if(sparm->loss_function == QBETA) { // AIC-QBETA
+	  loss=Qbeta_loss(a,b,c,d,sparm->beta);
+  }
+  else if(sparm->loss_function == QBETA_ACC) { // AIC-QBETA
+	  loss=Qbeta_acc_loss(a,b,c,d,sparm->beta);
+  }
+  else if(sparm->loss_function == QBETA_F1) { // AIC-QBETA
+	  loss=Qbeta_f1_loss(a,b,c,d,sparm->beta);
+  }
+  else if(sparm->loss_function == QBETA_GM) { // AIC-QBETA
+	  loss=Qbeta_gm_loss(a,b,c,d,sparm->beta);
+  }
+   else if(sparm->loss_function == ERRORRATE) {
+     loss=errorrate_loss(a,b,c,d);
+   }
+@@ -1242,6 +1355,15 @@
+   else if(sparm->loss_function == AVGPREC) {
+     loss=avgprec_loss(y,ybar);
+   }
+  else if(sparm->loss_function == KLD) { //KLD
+    loss=kldiv_loss(a,b,c,d); //KLD
+  } //KLD
+  else if(sparm->loss_function == NKLD) { //KLD
+    loss=nkldiv_loss(a,b,c,d); //KLD
+  } //KLD
+  else if(sparm->loss_function == MILLI) { //MILLI
+    loss=milli_loss(a,b,c,d); //MILLI
+  } //MILLI
+   else {
+     /* Put your code for different loss functions here. But then
+        find_most_violated_constraint_???(x, y, sm) has to return the
+@@ -1320,6 +1442,16 @@
+     printf("PRBEP    : %5.2f\n",teststats->prbep);
+     printf("ROCArea  : %5.2f\n",teststats->rocarea);
+     printf("AvgPrec  : %5.2f\n",teststats->avgprec);
+	printf("Qb       : %5.2f\n",teststats->Qbeta);
+	printf("Qb (Acc) : %5.2f\n",teststats->Qbeta_acc);
+	printf("Qb (F1)  : %5.2f\n",teststats->Qbeta_f1);
+	printf("Qb (GM)  : %5.2f\n",teststats->Qbeta_gm);
+	printf("NAE      : %5.2f\n",teststats->nae);
+	printf("AE       : %5.2f\n",teststats->ae);
+	printf("RAE      : %5.2f\n",teststats->rae);
+    printf("GM       : %5.2f\n",teststats->gm);
+    printf("KLD       : %5.2f\n",teststats->kld);
+    printf("NKLD       : %5.2f\n",teststats->nkld);
+   }
+   else {
+     printf("NOTE: %ld test examples are unlabeled, so performance cannot be computed. The\n",teststats->test_data_unlabeled);
+@@ -1352,6 +1484,29 @@
+     teststats->recall=100.0-loss(ex.y,ypred,sparm);
+     sparm->loss_function=FONE;
+     teststats->fone=100.0-loss(ex.y,ypred,sparm);
+	  
+    sparm->loss_function=GM;  // AIC-QBETA
+    teststats->gm=100.0-loss(ex.y,ypred,sparm);
+    sparm->loss_function=NAE;  // AIC-QBETA
+    teststats->nae=100.0-loss(ex.y,ypred,sparm);
+    sparm->loss_function=AE;  // AIC-QBETA
+    teststats->ae=100.0-loss(ex.y,ypred,sparm);
+    sparm->loss_function=RAE;  // AIC-QBETA
+    teststats->rae=100.0-loss(ex.y,ypred,sparm);
+    sparm->loss_function=QBETA;  // AIC-QBETA
+    teststats->Qbeta=100.0-loss(ex.y,ypred,sparm);
+    sparm->loss_function=QBETA_ACC;  // AIC-QBETA
+    teststats->Qbeta_acc=100.0-loss(ex.y,ypred,sparm);
+    sparm->loss_function=QBETA_F1;  // AIC-QBETA
+    teststats->Qbeta_f1=100.0-loss(ex.y,ypred,sparm);
+    sparm->loss_function=QBETA_GM;  // AIC-QBETA
+    teststats->Qbeta_gm=100.0-loss(ex.y,ypred,sparm);
+
+    sparm->loss_function=KLD; // KLD
+    teststats->kld=100-loss(ex.y,ypred,sparm);
+    sparm->loss_function=NKLD; // KLD
+    teststats->nkld=100.0-loss(ex.y,ypred,sparm);
+	
+     teststats->prbep=prbep(ex.y,ypred);
+     teststats->rocarea=rocarea(ex.y,ypred);
+     teststats->avgprec=avgprec(ex.y,ypred);
+@@ -1403,6 +1558,7 @@
+   STRUCTMODEL sm;
+   
+   sm.svm_model=read_model(file);
+	sparm->beta = 1;                      // AIC-QBETA *****************************
+   sparm->loss_function=ERRORRATE;
+   sparm->bias=0;
+   sparm->bias_featurenum=0;
+@@ -1514,6 +1670,18 @@
+   printf("    %2d  Prec@k: 100 minus precision at k in percent.\n",PREC_K);
+   printf("    %2d  Rec@k: 100 minus recall at k in percent.\n",REC_K);
+   printf("    %2d  ROCArea: Percentage of swapped pos/neg pairs (i.e. 100 - ROCArea).\n\n",SWAPPEDPAIRS);
+  printf("    %2d  Kullback-Leibler divergence.\n",KLD); //KLD
+  printf("    %2d  Normalized Kullback-Leibler divergence.\n",NKLD); //KLD
+  printf("    %2d  MILLI.\n",MILLI); //MILLI
+  printf("    %2d  GM: geometric mean of tpr and tnr\n",GM); // AIC-QBETA
+  printf("    %2d  NAE: normalized absolute error (Esuli & Sebastiani)\n",NAE); // AIC-QBETA
+  printf("    %2d  AE: absolute error (Esuli & Sebastiani)\n",AE); // AIC-QBETA
+  printf("    %2d  RAE: relative absolute error (Esuli & Sebastiani)\n",RAE); // AIC-QBETA
+  printf("    %2d  Qbeta: 100 minus the Qbeta-score in percent (with recall)\n",QBETA); // AIC-QBETA
+  printf("    %2d  Qbeta_acc: 100 minus the Qbeta-score in percent (with acc)\n",QBETA_ACC); // AIC-QBETA
+  printf("    %2d  Qbeta_f1: 100 minus the Qbeta-score in percent (with F1)\n",QBETA_F1); // AIC-QBETA
+  printf("    %2d  Qbeta_gm: 100 minus the Qbeta-score in percent (with GM)\n",QBETA_GM); // AIC-QBETA
+	
+   printf("NOTE: The '-c' parameters in SVM-light and SVM-perf are related as\n");
+   printf("      c_light = c_perf*100/n for the 'Errorrate' loss function, where n is the\n");
+   printf("      number of training examples.\n\n");
+@@ -1785,7 +1953,65 @@
+   free(predset);
+   return(100.0*(apr/(double)(nump)));
+ }
+-
+/* AIC-QBETA */
+double tnr(int a, int b, int c, int d) 
+{
+	/* Returns tnr as fractional value. */
+	if((b+d) == 0) return(0.0);
+	return((double)d/(double)(b+d));
+}
+double gm(int a, int b, int c, int d) 
+{
+	double tprate = rec(a,b,c,d);
+	double tnrate = tnr(a,b,c,d);
+	return sqrt( tprate * tnrate );
+}
+double nae(int a, int b, int c, int d) 
+{
+	double maximo = (a+c > b+d? a+c: b+d);
+	return 1.0 - ( (double)abs(c-b) / maximo);
+	//return 1.0 - ( (double)abs(c-b) / (double)(a+b+c+d)); // ABSERR
+}
+double ae(int a, int b, int c, int d) 
+{
+	return (double)abs(c-b) / (double)(a+b+c+d) ; // ABSERR
+}
+double rae(int a, int b, int c, int d) 
+{
+	double absdif = (double)abs(c-b) ;
+	double smooth_rae_pos = absdif / ((double)(a+c+0.5)) ;
+	double smooth_rae_neg = absdif / ((double)(b+d+0.5)) ;
+	return 0.5*(smooth_rae_pos + smooth_rae_neg) ; 
+}
+double Qbeta(int a, int b, int c, int d, double beta) 
+{
+	if(a+c == 0) return(0.0);
+	double qperf=nae(a,b,c,d);
+	double cperf=rec(a,b,c,d);
+	return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf));
+}
+double Qbeta_acc(int a, int b, int c, int d, double beta) 
+{
+	if(a+c == 0) return(0.0);
+	double qperf=nae(a,b,c,d);
+	double cperf=1.0-errorrate(a,b,c,d);
+	return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf));
+}
+double Qbeta_f1(int a, int b, int c, int d, double beta) 
+{
+	if(a+c == 0) return(0.0);
+	double qperf=nae(a,b,c,d);
+	double cperf=fone(a,b,c,d);
+	return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf));
+}
+double Qbeta_gm(int a, int b, int c, int d, double beta) 
+{
+	if(a+c == 0) return(0.0);
+	double qperf=nae(a,b,c,d);
+	double cperf=gm(a,b,c,d);
+	return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf));
+}
+/* AIC-QBETA */
+ /*------- Loss functions based on performance measures --------*/
+ 
+ double zeroone_loss(int a, int b, int c, int d) 
+@@ -1846,4 +2072,70 @@
+ }
+ 
+ 
+//KLD
+double kldiv(int a, int b, int c, int d)
+{
+  double sum = a+b+c+d+1.0;
+  double pab = (a+b+0.5)/sum;
+  double pac = (a+c+0.5)/sum;
+  double pbd = (b+d+0.5)/sum;
+  double pcd = (c+d+0.5)/sum;
+
+  double kl = pac*log(pac/pab)+pbd*log(pbd/pcd);
+
+  return kl;
+}
+
+//KLD
+double kldiv_loss(int a, int b, int c, int d)
+{
+  return kldiv(a,b,c,d);
+}
+
+//KLD
+double nkldiv_loss(int a, int b, int c, int d)
+{
+  return 100.0-(100.0*2.0/(1.0+exp(kldiv(a,b,c,d))));
+}
+
+//MILLI
+double milli_loss(int a, int b, int c, int d)
+{
+  int sum = a+b+c+d;
+  return 100.0*(b+c)*ABS(b-c);
+}
+ 
+/* AIC-QBETA */
+double gm_loss(int a, int b, int c, int d) 
+{	
+	return  100.0 * (1.0-gm(a,b,c,d));
+}
+double nae_loss(int a, int b, int c, int d) 
+{	
+	return  100.0 * (1.0-nae(a,b,c,d));
+}
+double ae_loss(int a, int b, int c, int d) 
+{	
+	return  100.0 * ae(a,b,c,d);
+}
+double rae_loss(int a, int b, int c, int d) 
+{	
+	return  100.0 * rae(a,b,c,d);
+}
+double Qbeta_loss(int a, int b, int c, int d,double beta) 
+{
+	return(100.0*(1.0-Qbeta(a,b,c,d,beta)));
+}
+double Qbeta_acc_loss(int a, int b, int c, int d,double beta) 
+{
+	return(100.0*(1.0-Qbeta_acc(a,b,c,d,beta)));
+}
+double Qbeta_f1_loss(int a, int b, int c, int d,double beta) 
+{
+	return(100.0*(1.0-Qbeta_f1(a,b,c,d,beta)));
+}
+double Qbeta_gm_loss(int a, int b, int c, int d,double beta) 
+{
+	return(100.0*(1.0-Qbeta_gm(a,b,c,d,beta)));
+}
+/* AIC-QBETA */
+diff -ruN svm_perf/svm_struct_api_types.h svm_perf_quantification/svm_struct_api_types.h
+--- svm_perf/svm_struct_api_types.h	2020-10-28 12:23:19.000000000 +0100
+++ svm_perf_quantification/svm_struct_api_types.h	2020-10-28 12:23:53.000000000 +0100
+@@ -28,14 +28,25 @@
+ # define INST_VERSION_DATE  "15.07.2009"
+ 
+ /* Identifiers for loss functions */
+-#define ZEROONE      0
+-#define FONE         1
+-#define ERRORRATE    2
+-#define PRBEP        3
+-#define PREC_K       4
+-#define REC_K        5
+-#define SWAPPEDPAIRS 10
+-#define AVGPREC      11
+#define ZEROONE       0
+#define FONE          1
+#define ERRORRATE     2
+#define PRBEP         3
+#define PREC_K        4
+#define REC_K         5
+#define SWAPPEDPAIRS  10
+#define AVGPREC       11
+#define KLD         12 //KLD
+#define NKLD         13 //KLD
+#define MILLI         16 //MILLI
+#define GM			 20 // AIC-QBETA
+#define NAE			 21 // AIC-QBETA
+#define QBETA        22 // AIC-QBETA
+#define QBETA_ACC    23 // AIC-QBETA
+#define QBETA_F1     24 // AIC-QBETA
+#define QBETA_GM     25 // AIC-QBETA
+#define AE			 26 // AIC-QBETA
+#define RAE			 27 // AIC-QBETA
+ 
+ /* default precision for solving the optimization problem */
+ # define DEFAULT_EPS         0.1 
+@@ -169,6 +180,8 @@
+ 				  svm_perf_classify. This uses more
+ 				  memory, but is faster if the support
+ 				  vectors in the model are dense. */
+  double loss_parm;
+  double beta; /* AIC-QBETA */
+ } STRUCT_LEARN_PARM;
+ 
+ typedef struct struct_test_stats {
+@@ -183,6 +196,16 @@
+   double prbep;
+   double rocarea;
+   double avgprec;
+  double kld; //KLD
+  double nkld; //KLD
+  double gm; // AIC-QBETA
+  double nae; // AIC-QBETA
+  double ae; // AIC-QBETA
+  double rae; // AIC-QBETA
+  double Qbeta; // AIC-QBETA
+  double Qbeta_acc; // AIC-QBETA
+  double Qbeta_f1; // AIC-QBETA
+  double Qbeta_gm; // AIC-QBETA
+ } STRUCT_TEST_STATS;
+ 
+ typedef struct struct_id_score {