From a882424eeb36b1295286123d8e9ba07c376500d8 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Thu, 3 Dec 2020 18:12:28 +0100 Subject: [PATCH] many aggregative methods added --- TODO.txt | 3 +- prepare_svmperf.sh | 28 ++ quapy/__init__.py | 6 + quapy/error.py | 47 +++ quapy/functional.py | 49 +++ quapy/method/__init__.py | 30 ++ quapy/method/aggregative.py | 351 +++++++++++++++++++ quapy/method/base.py | 21 ++ quapy/method/non_aggregative.py | 21 ++ svm-perf-quantification-ext.patch | 540 ++++++++++++++++++++++++++++++ 10 files changed, 1095 insertions(+), 1 deletion(-) create mode 100755 prepare_svmperf.sh create mode 100644 quapy/__init__.py create mode 100644 quapy/error.py create mode 100644 quapy/functional.py create mode 100644 quapy/method/__init__.py create mode 100644 quapy/method/aggregative.py create mode 100644 quapy/method/base.py create mode 100644 quapy/method/non_aggregative.py create mode 100644 svm-perf-quantification-ext.patch diff --git a/TODO.txt b/TODO.txt index d632436..d25ed25 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,2 +1,3 @@ Documentation with sphinx -The parallel training in svmperf seems not to work \ No newline at end of file +The parallel training in svmperf seems not to work +Add "prepare svmperf for quantification" script \ No newline at end of file diff --git a/prepare_svmperf.sh b/prepare_svmperf.sh new file mode 100755 index 0000000..aea1c08 --- /dev/null +++ b/prepare_svmperf.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -x + +URL=http://download.joachims.org/svm_perf/current/svm_perf.tar.gz +FILE=./svm_perf.tar.gz +wget $URL $FILE +mkdir ./svm_perf +tar xvzf $FILE -C ./svm_perf +rm $FILE + +#para crear el patch [para mi] +#diff -Naur svm_perf svm_perf_quantification > svm-perf-quantification-ext.patch + +#para crear la modificacion +#cp svm_perf svm_perf_quantification -r [ESTO NO HACE FALTA] +patch -s -p0 < svm-perf-quantification-ext.patch +mv svm_perf svm_perf_quantification +cd svm_perf_quantification +make + + + + + + + + + diff --git a/quapy/__init__.py b/quapy/__init__.py new file mode 100644 index 0000000..59e21fe --- /dev/null +++ b/quapy/__init__.py @@ -0,0 +1,6 @@ +from .dataset import * +from . import functional +from . import method +from . import error + + diff --git a/quapy/error.py b/quapy/error.py new file mode 100644 index 0000000..ff9a6e0 --- /dev/null +++ b/quapy/error.py @@ -0,0 +1,47 @@ +from sklearn.metrics import f1_score +from settings import SAMPLE_SIZE + + +def f1e(y_true, y_pred): + return 1. - f1_score(y_true, y_pred, average='macro') + + +def acce(y_true, y_pred): + acc = (y_true == y_pred).mean() + return 1. - acc + + +def mae(prevs, prevs_hat): + return ae(prevs, prevs_hat).mean() + + +def ae(p, p_hat): + assert p.shape == p_hat.shape, 'wrong shape' + return abs(p_hat-p).mean(axis=-1) + + +def mrae(p, p_hat, eps=1./(2. * SAMPLE_SIZE)): + return rae(p, p_hat, eps).mean() + + +def rae(p, p_hat, eps=1./(2. * SAMPLE_SIZE)): + p = smooth(p, eps) + p_hat = smooth(p_hat, eps) + return (abs(p-p_hat)/p).mean(axis=-1) + + +def smooth(p, eps): + n_classes = p.shape[-1] + return (p+eps)/(eps*n_classes + 1) + + +CLASSIFICATION_ERROR = {f1e, acce} +QUANTIFICATION_ERROR = {mae, mrae} + +f1_error = f1e +acc_error = acce +mean_absolute_error = mae +absolute_error = ae +mean_relative_absolute_error = mrae +relative_absolute_error = rae + diff --git a/quapy/functional.py b/quapy/functional.py new file mode 100644 index 0000000..f44a85b --- /dev/null +++ b/quapy/functional.py @@ -0,0 +1,49 @@ +from collections import defaultdict +import numpy as np +import itertools + + +def artificial_prevalence_sampling(dimensions, n_prevalences=21, repeat=1, return_constrained_dim=False): + s = np.linspace(0., 1., n_prevalences, endpoint=True) + s = [s] * (dimensions - 1) + prevs = [p for p in itertools.product(*s, repeat=1) if sum(p)<=1] + if return_constrained_dim: + prevs = [p+(1-sum(p),) for p in prevs] + prevs = np.asarray(prevs).reshape(len(prevs), -1) + if repeat>1: + prevs = np.repeat(prevs, repeat, axis=0) + return prevs + + +def prevalence_from_labels(labels, n_classes): + unique, counts = np.unique(labels, return_counts=True) + by_class = defaultdict(lambda:0, dict(zip(unique, counts))) + prevalences = np.asarray([by_class[ci] for ci in range(n_classes)], dtype=np.float) + prevalences /= prevalences.sum() + return prevalences + + +def prevalence_from_probabilities(posteriors, binarize: bool = False): + if binarize: + predictions = np.argmax(posteriors, axis=-1) + return prevalence_from_labels(predictions, n_classes=posteriors.shape[1]) + else: + prevalences = posteriors.mean(axis=0) + prevalences /= prevalences.sum() + return prevalences + + +def strprev(prevalences, prec=3): + return '['+ ', '.join([f'{p:.{prec}f}' for p in prevalences]) + ']' + + +def adjusted_quantification(prevalence_estim, tpr, fpr, clip=True): + den = tpr - fpr + if den == 0: + den += 1e-8 + adjusted = (prevalence_estim - fpr) / den + if clip: + adjusted = np.clip(adjusted, 0., 1.) + return adjusted + + diff --git a/quapy/method/__init__.py b/quapy/method/__init__.py new file mode 100644 index 0000000..a8e98d0 --- /dev/null +++ b/quapy/method/__init__.py @@ -0,0 +1,30 @@ +from . import aggregative as agg +from . import non_aggregative as nagg + + +AGGREGATIVE_METHODS = { + agg.ClassifyAndCount, + agg.AdjustedClassifyAndCount, + agg.ProbabilisticClassifyAndCount, + agg.ProbabilisticAdjustedClassifyAndCount, + agg.ExplicitLossMinimisation, + agg.ExpectationMaximizationQuantifier, +} + +NON_AGGREGATIVE_METHODS = { + nagg.MaximumLikelihoodPrevalenceEstimation +} + +QUANTIFICATION_METHODS = AGGREGATIVE_METHODS | NON_AGGREGATIVE_METHODS + + +# common alisases +CC = agg.ClassifyAndCount +ACC = agg.AdjustedClassifyAndCount +PCC = agg.ProbabilisticClassifyAndCount +PACC = agg.ProbabilisticAdjustedClassifyAndCount +ELM = agg.ExplicitLossMinimisation +EMQ = agg.ExpectationMaximizationQuantifier +MLPE = nagg.MaximumLikelihoodPrevalenceEstimation + + diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py new file mode 100644 index 0000000..ee16baf --- /dev/null +++ b/quapy/method/aggregative.py @@ -0,0 +1,351 @@ +import numpy as np +from .base import * +from ..error import mae +import functional as F +from ..classification.svmperf import SVMperf +from ..dataset import LabelledCollection +from sklearn.metrics import confusion_matrix +from sklearn.calibration import CalibratedClassifierCV +from joblib import Parallel, delayed + + +# Abstract classes +# ------------------------------------ + +class AggregativeQuantifier(BaseQuantifier): + """ + Abstract class for quantification methods that base their estimations on the aggregation of classification + results. Aggregative Quantifiers thus implement a _classify_ method and maintain a _learner_ attribute. + """ + + @abstractmethod + def fit(self, data: LabelledCollection, fit_learner=True, *args): ... + + def classify(self, documents): + return self.learner.predict(documents) + + def get_params(self, deep=True): + return self.learner.get_params() + + def set_params(self, **parameters): + self.learner.set_params(**parameters) + + @property + def n_classes(self): + return len(self.classes) + + @property + def classes(self): + return self.learner.classes_ + + +class AggregativeProbabilisticQuantifier(AggregativeQuantifier): + """ + Abstract class for quantification methods that base their estimations on the aggregation of posterior probabilities + as returned by a probabilistic classifier. Aggregative Probabilistic Quantifiers thus extend Aggregative + Quantifiersimplement by implementing a _soft_classify_ method returning values in [0,1] -- the posterior + probabilities. + """ + + def soft_classify(self, data): + return self.learner.predict_proba(data) + + def set_params(self, **parameters): + if isinstance(self.learner, CalibratedClassifierCV): + parameters={'base_estimator__'+k:v for k,v in parameters.items()} + self.learner.set_params(**parameters) + + +# Helper +# ------------------------------------ +def training_helper(learner, + data: LabelledCollection, + fit_learner: bool = True, + ensure_probabilistic=False, + train_val_split=None): + """ + Training procedure common to all Aggregative Quantifiers. + :param learner: the learner to be fit + :param data: the data on which to fit the learner. If requested, the data will be split before fitting the learner. + :param fit_learner: whether or not to fit the learner + :param ensure_probabilistic: if True, guarantees that the resulting classifier implements predict_proba (if the + learner is not probabilistic, then a CalibratedCV instance of it is trained) + :param train_val_split: if specified, indicates the proportion of training documents on which to fit the learner + :return: the learner trained on the training set, and the unused data (a _LabelledCollection_ if train_val_split>0 + or None otherwise) + """ + if fit_learner: + if ensure_probabilistic: + if not hasattr(learner, 'predict_proba'): + print(f'The learner {learner.__class__.__name__} does not seem to be probabilistic. ' + f'The learner will be calibrated.') + learner = CalibratedClassifierCV(learner, cv=5) + if train_val_split is not None: + if not (0 < train_val_split < 1): + raise ValueError(f'train/val split {train_val_split} out of range, must be in (0,1)') + train, unused = data.split_stratified(train_prop=train_val_split) + else: + train, unused = data, None + learner.fit(train.instances, train.labels) + else: + if ensure_probabilistic: + if not hasattr(learner, 'predict_proba'): + raise AssertionError('error: the learner cannot be calibrated since fit_learner is set to False') + unused = data + + return learner, unused + + +# Methods +# ------------------------------------ +class ClassifyAndCount(AggregativeQuantifier): + """ + The most basic Quantification method. One that simply classifies all instances and countes how many have been + attributed each of the classes in order to compute class prevalence estimates. + """ + + def __init__(self, learner): + self.learner = learner + + def fit(self, data: LabelledCollection, fit_learner=True, *args): + """ + Trains the Classify & Count method unless _fit_learner_ is False, in which case it is assumed to be already fit. + :param data: training data + :param fit_learner: if False, the classifier is assumed to be fit + :param args: unused + :return: self + """ + self.learner, _ = training_helper(self.learner, data, fit_learner) + return self + + def quantify(self, documents, *args): + classification = self.classify(documents) # classify + return F.prevalence_from_labels(classification, self.n_classes) # & count + + +class AdjustedClassifyAndCount(AggregativeQuantifier): + + def __init__(self, learner): + self.learner = learner + + def fit(self, data: LabelledCollection, fit_learner=True, train_val_split=0.6): + self.learner, validation = training_helper(self.learner, data, fit_learner, train_val_split=train_val_split) + self.cc = ClassifyAndCount(self.learner) + y_ = self.cc.classify(validation.instances) + y = validation.labels + # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a + # document that belongs to yj ends up being classified as belonging to yi + self.Pte_cond_estim_ = confusion_matrix(y,y_).T / validation.counts() + return self + + def quantify(self, documents, *args): + prevs_estim = self.cc.quantify(documents) + # solve for the linear system Ax = B with A=Pte_cond_estim and B = prevs_estim + A = self.Pte_cond_estim_ + B = prevs_estim + try: + adjusted_prevs = np.linalg.solve(A, B) + adjusted_prevs = np.clip(adjusted_prevs, 0, 1) + adjusted_prevs /= adjusted_prevs.sum() + except np.linalg.LinAlgError: + adjusted_prevs = prevs_estim # no way to adjust them! + return adjusted_prevs + + def classify(self, data): + return self.cc.classify(data) + + +class ProbabilisticClassifyAndCount(AggregativeProbabilisticQuantifier): + def __init__(self, learner): + self.learner = learner + + def fit(self, data : LabelledCollection, fit_learner=True, *args): + self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True) + return self + + def quantify(self, documents, *args): + posteriors = self.soft_classify(documents) # classify + prevalences = F.prevalence_from_probabilities(posteriors, binarize=False) # & count + return prevalences + + +class ProbabilisticAdjustedClassifyAndCount(AggregativeQuantifier): + + def __init__(self, learner): + self.learner = learner + + def fit(self, data: LabelledCollection, fit_learner=True, train_val_split=0.6): + self.learner, validation = training_helper( + self.learner, data, fit_learner, ensure_probabilistic=True, train_val_split=train_val_split + ) + self.pcc = ProbabilisticClassifyAndCount(self.learner) + y_ = self.pcc.classify(validation.instances) + y = validation.labels + # estimate the matrix with entry (i,j) being the estimate of P(yi|yj), that is, the probability that a + # document that belongs to yj ends up being classified as belonging to yi + self.Pte_cond_estim_ = confusion_matrix(y, y_).T / validation.counts() + return self + + def quantify(self, documents, *args): + prevs_estim = self.pcc.quantify(documents) + A = self.Pte_cond_estim_ + B = prevs_estim + try: + adjusted_prevs = np.linalg.solve(A, B) + adjusted_prevs = np.clip(adjusted_prevs, 0, 1) + adjusted_prevs /= adjusted_prevs.sum() + except np.linalg.LinAlgError: + adjusted_prevs = prevs_estim # no way to adjust them! + return adjusted_prevs + + def classify(self, data): + return self.pcc.classify(data) + + +class ExpectationMaximizationQuantifier(AggregativeProbabilisticQuantifier): + + MAX_ITER = 1000 + EPSILON = 1e-4 + + def __init__(self, learner, verbose=False): + self.learner = learner + self.verbose = verbose + + def fit(self, data: LabelledCollection, fit_learner=True, *args): + self.learner, _ = training_helper(self.learner, data, fit_learner, ensure_probabilistic=True) + self.train_prevalence = F.prevalence_from_labels(data.labels, self.n_classes) + return self + + def quantify(self, X, epsilon=EPSILON): + tr_prev=self.train_prevalence + posteriors = self.soft_classify(X) + return self.EM(tr_prev, posteriors, self.verbose, epsilon) + + @classmethod + def EM(cls, tr_prev, posterior_probabilities, verbose=False, epsilon=EPSILON): + Px = posterior_probabilities + Ptr = np.copy(tr_prev) + qs = np.copy(Ptr) # qs (the running estimate) is initialized as the training prevalence + + s, converged = 0, False + qs_prev_ = None + while not converged and s < ExpectationMaximizationQuantifier.MAX_ITER: + # E-step: ps is Ps(y=+1|xi) + ps_unnormalized = (qs / Ptr) * Px + ps = ps_unnormalized / ps_unnormalized.sum(axis=1).reshape(-1,1) + + # M-step: qs_pos is Ps+1(y=+1) + qs = ps.mean(axis=0) + + if qs_prev_ is not None and mae(qs, qs_prev_) < epsilon and s>10: + converged = True + + qs_prev_ = qs + s += 1 + + if verbose: + print('-'*80) + + if not converged: + raise UserWarning('the method has reached the maximum number of iterations; it might have not converged') + + return qs + + +# todo: from here +def train_task(c, learners, data): + learners[c].fit(data.documents, data.labels == c) + + +def binary_quant_task(c, learners, X): + predictions_ci = learners[c].predict(X) + return predictions_ci.mean() # since the predictions array is binary + + +class OneVsAllELM(AggregativeQuantifier): + + def __init__(self, svmperf_base, loss, n_jobs=-1, **kwargs): + self.svmperf_base = svmperf_base + self.loss = loss + self.n_jobs = n_jobs + self.kwargs = kwargs + + def fit(self, data: LabelledCollection, fit_learner=True, *args): + assert fit_learner, 'the method requires that fit_learner=True' + + self.learners = {c: SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs) for c in data.classes_} + Parallel(n_jobs=self.n_jobs, backend='threading')( + delayed(train_task)(c, self.learners, data) for c in self.learners.keys() + ) + return self + + def quantify(self, X, y=None): + prevalences = np.asarray( + Parallel(n_jobs=self.n_jobs, backend='threading')( + delayed(binary_quant_task)(c, self.learners, X) for c in self.learners.keys() + ) + ) + prevalences /= prevalences.sum() + return prevalences + + @property + def classes(self): + return sorted(self.learners.keys()) + + def preclassify_collection(self, data: LabelledCollection): + classifications = [] + for class_ in data.classes_: + classifications.append(self.learners[class_].predict(data.instances)) + classifications = np.vstack(classifications).T + precomputed = LabelledCollection(classifications, data.labels) + return precomputed + + def set_params(self, **parameters): + self.kwargs=parameters + + def get_params(self, deep=True): + return self.kwargs + + +class ExplicitLossMinimisation(AggregativeQuantifier): + + def __init__(self, svmperf_base, loss, **kwargs): + self.learner = SVMperf(svmperf_base, loss=loss, **kwargs) + + def fit(self, data: LabelledCollection, fit_learner=True, *args): + assert fit_learner, 'the method requires that fit_learner=True' + self.learner.fit(data.instances, data.labels) + return self + + def quantify(self, X, y=None): + predictions = self.learner.predict(X) + return F.prevalence_from_labels(predictions, self.learner.n_classes_) + + def classify(self, X, y=None): + return self.learner.predict(X) + + +class SVMQ(ExplicitLossMinimisation): + def __init__(self, svmperf_base, **kwargs): + super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs) + + +class SVMKLD(ExplicitLossMinimisation): + def __init__(self, svmperf_base, **kwargs): + super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs) + + +class SVMNKLD(ExplicitLossMinimisation): + def __init__(self, svmperf_base, **kwargs): + super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs) + + +class SVMAE(ExplicitLossMinimisation): + def __init__(self, svmperf_base, **kwargs): + super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs) + + +class SVMRAE(ExplicitLossMinimisation): + def __init__(self, svmperf_base, **kwargs): + super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs) + diff --git a/quapy/method/base.py b/quapy/method/base.py new file mode 100644 index 0000000..4679a8f --- /dev/null +++ b/quapy/method/base.py @@ -0,0 +1,21 @@ +from abc import ABCMeta, abstractmethod +import quapy as qp + + +# Base Quantifier abstract class +# ------------------------------------ +class BaseQuantifier(metaclass=ABCMeta): + + @abstractmethod + def fit(self, data: qp.LabelledCollection, *args): ... + + @abstractmethod + def quantify(self, documents, *args): ... + + @abstractmethod + def set_params(self, **parameters): ... + + @abstractmethod + def get_params(self, deep=True): ... + + diff --git a/quapy/method/non_aggregative.py b/quapy/method/non_aggregative.py new file mode 100644 index 0000000..1bd7c89 --- /dev/null +++ b/quapy/method/non_aggregative.py @@ -0,0 +1,21 @@ +from quapy import LabelledCollection +from .base import BaseQuantifier + + + +class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier): + + def __init__(self, **kwargs): + pass + + def fit(self, data: LabelledCollection, *args): + self.estimated_prevalence = data.prevalence() + + def quantify(self, documents, *args): + return self.estimated_prevalence + + def get_params(self): + pass + + def set_params(self, **parameters): + pass diff --git a/svm-perf-quantification-ext.patch b/svm-perf-quantification-ext.patch new file mode 100644 index 0000000..df59328 --- /dev/null +++ b/svm-perf-quantification-ext.patch @@ -0,0 +1,540 @@ +diff -ruN svm_perf/svm_struct/svm_struct_main.c svm_perf_quantification/svm_struct/svm_struct_main.c +--- svm_perf/svm_struct/svm_struct_main.c 2020-10-28 12:23:19.000000000 +0100 ++++ svm_perf_quantification/svm_struct/svm_struct_main.c 2020-10-28 12:23:53.000000000 +0100 +@@ -128,7 +128,8 @@ + struct_parm->newconstretrain=100; + struct_parm->ccache_size=5; + struct_parm->batch_size=100; +- ++ struct_parm->loss_parm=1.0; ++ struct_parm->beta=1.0; // AIC-QBETA + strcpy (modelfile, "svm_struct_model"); + strcpy (learn_parm->predfile, "trans_predictions"); + strcpy (learn_parm->alphafile, ""); +@@ -170,6 +171,7 @@ + case 'p': i++; struct_parm->slack_norm=atol(argv[i]); break; + case 'e': i++; struct_parm->epsilon=atof(argv[i]); break; + case 'k': i++; struct_parm->newconstretrain=atol(argv[i]); break; ++ case 'j': i++; struct_parm->beta=atof(argv[i]); break; // AIC-QBETA + case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break; + case '#': i++; learn_parm->maxiter=atol(argv[i]); break; + case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break; +@@ -189,6 +191,7 @@ + case '-': strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);i++; strcpy(struct_parm->custom_argv[struct_parm->custom_argc++],argv[i]);break; + case 'v': i++; (*struct_verbosity)=atol(argv[i]); break; + case 'y': i++; (*verbosity)=atol(argv[i]); break; ++ case '!': i++; struct_parm->loss_parm=atof(argv[i]); break; + default: printf("\nUnrecognized option %s!\n\n",argv[i]); + print_help(); + exit(0); +@@ -396,6 +399,9 @@ + printf(" (in the same order as in the training set)\n"); + printf("Application-Specific Options:\n"); + print_struct_help(); ++ printf("*************************************************\n"); // AIC-QBETA ++ printf(" -j float -> parameter beta for qbeta-based loss functions (default: 1.0)\n"); ++ printf("*************************************************\n"); + wait_any_key(); + + printf("\nMore details in:\n"); +diff -ruN svm_perf/svm_struct_api.c svm_perf_quantification/svm_struct_api.c +--- svm_perf/svm_struct_api.c 2020-10-28 12:23:19.000000000 +0100 ++++ svm_perf_quantification/svm_struct_api.c 2020-10-28 12:23:53.000000000 +0100 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include "svm_struct_api.h" + #include "svm_light/svm_common.h" + #include "svm_struct/svm_struct_common.h" +@@ -27,7 +28,9 @@ + + #define MAX(x,y) ((x) < (y) ? (y) : (x)) + #define MIN(x,y) ((x) > (y) ? (y) : (x)) ++#define ABS(x) ((x) < (0) ? (-(x)) : (x)) + #define SIGN(x) ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0)))) ++#define PI (3.141592653589793) + + int compareup(const void *a, const void *b) + { +@@ -72,6 +75,16 @@ + double rocarea(LABEL y, LABEL ybar); + double prbep(LABEL y, LABEL ybar); + double avgprec(LABEL y, LABEL ybar); ++/* AIC-QBETA */ ++double gm(int a, int b, int c, int d); ++double nae(int a, int b, int c, int d); ++double ae(int a, int b, int c, int d); ++double rae(int a, int b, int c, int d); ++double Qbeta(int a, int b, int c, int d, double beta); ++double Qbeta_acc(int a, int b, int c, int d, double beta); ++double Qbeta_f1(int a, int b, int c, int d, double beta); ++double Qbeta_gm(int a, int b, int c, int d, double beta); ++/* AIC-QBETA */ + + double zeroone_loss(int a, int b, int c, int d); + double fone_loss(int a, int b, int c, int d); +@@ -82,6 +95,23 @@ + double swappedpairs_loss(LABEL y, LABEL ybar); + double avgprec_loss(LABEL y, LABEL ybar); + ++double kldiv(int a, int b, int c, int d); // KLD ++double kldiv_loss(int a, int b, int c, int d); // KLD ++double nkldiv_loss(int a, int b, int c, int d); // KLD ++ ++double milli_loss(int a, int b, int c, int d); //MILL ++ ++/* AIC-QBETA */ ++double gm_loss(int a, int b, int c, int d); ++double nae_loss(int a, int b, int c, int d); ++double ae_loss(int a, int b, int c, int d); ++double rae_loss(int a, int b, int c, int d); ++double Qbeta_loss(int a, int b, int c, int d, double beta); ++double Qbeta_acc_loss(int a, int b, int c, int d, double beta); ++double Qbeta_f1_loss(int a, int b, int c, int d, double beta); ++double Qbeta_gm_loss(int a, int b, int c, int d, double beta); ++/* AIC-QBETA */ ++ + void svm_struct_learn_api_init(int argc, char* argv[]) + { + /* Called in learning part before anything else is done to allow +@@ -181,10 +211,22 @@ + /* change label value for better scaling using thresholdmetrics */ + if((sparm->loss_function == ZEROONE) + || (sparm->loss_function == FONE) ++ || (sparm->loss_function == GM) // AIC-QBETA ++ || (sparm->loss_function == NAE) // AIC-QBETA ++ || (sparm->loss_function == AE) // AIC-QBETA ++ || (sparm->loss_function == RAE) // AIC-QBETA ++ || (sparm->loss_function == QBETA) // AIC-QBETA ++ || (sparm->loss_function == QBETA_ACC) // AIC-QBETA ++ || (sparm->loss_function == QBETA_F1) // AIC-QBETA ++ || (sparm->loss_function == QBETA_GM) // AIC-QBETA + || (sparm->loss_function == ERRORRATE) + || (sparm->loss_function == PRBEP) + || (sparm->loss_function == PREC_K) +- || (sparm->loss_function == REC_K)) { ++ || (sparm->loss_function == REC_K) ++ || (sparm->loss_function == KLD) ++ || (sparm->loss_function == NKLD) ++ || (sparm->loss_function == MILLI) ++ ) { + for(i=0;i0) + sample.examples[0].y.class[i]=0.5*100.0/(numn+nump); +@@ -520,10 +562,22 @@ + LABEL ybar; + if((sparm->loss_function == ZEROONE) + || (sparm->loss_function == FONE) ++ || (sparm->loss_function == GM) // AIC-QBETA ++ || (sparm->loss_function == NAE) // AIC-QBETA ++ || (sparm->loss_function == AE) // AIC-QBETA ++ || (sparm->loss_function == RAE) // AIC-QBETA ++ || (sparm->loss_function == QBETA) // AIC-QBETA ++ || (sparm->loss_function == QBETA_ACC) // AIC-QBETA ++ || (sparm->loss_function == QBETA_F1) // AIC-QBETA ++ || (sparm->loss_function == QBETA_GM) // AIC-QBETA + || (sparm->loss_function == ERRORRATE) + || (sparm->loss_function == PRBEP) + || (sparm->loss_function == PREC_K) +- || (sparm->loss_function == REC_K)) { ++ || (sparm->loss_function == REC_K) ++ || (sparm->loss_function == KLD) ++ || (sparm->loss_function == NKLD) ++ || (sparm->loss_function == MILLI) ++ ) { + ybar=find_most_violated_constraint_thresholdmetric(x,y,sm,sparm, + sparm->loss_type); + } +@@ -562,9 +616,21 @@ + sparm->loss_type); */ + else if((sparm->loss_function == ZEROONE) + || (sparm->loss_function == FONE) ++ || (sparm->loss_function == GM) // AIC-QBETA ++ || (sparm->loss_function == NAE) // AIC-QBETA ++ || (sparm->loss_function == AE) // AIC-QBETA ++ || (sparm->loss_function == RAE) // AIC-QBETA ++ || (sparm->loss_function == QBETA) // AIC-QBETA ++ || (sparm->loss_function == QBETA_ACC) // AIC-QBETA ++ || (sparm->loss_function == QBETA_F1) // AIC-QBETA ++ || (sparm->loss_function == QBETA_GM) // AIC-QBETA + || (sparm->loss_function == PRBEP) + || (sparm->loss_function == PREC_K) +- || (sparm->loss_function == REC_K)) ++ || (sparm->loss_function == REC_K) ++ || (sparm->loss_function == KLD) ++ || (sparm->loss_function == NKLD) ++ || (sparm->loss_function == MILLI) ++ ) + ybar=find_most_violated_constraint_thresholdmetric(x,y,sm,sparm, + sparm->loss_type); + else if((sparm->loss_function == SWAPPEDPAIRS)) +@@ -741,7 +807,23 @@ + if(sparm->loss_function == ZEROONE) + loss=zeroone_loss(a,numn-d,nump-a,d); + else if(sparm->loss_function == FONE) +- loss=fone_loss(a,numn-d,nump-a,d); ++ loss=fone_loss(a,numn-d,nump-a,d); ++ else if(sparm->loss_function == GM) // AIC-QBETA ++ loss=gm_loss(a,numn-d,nump-a,d); ++ else if(sparm->loss_function == NAE) // AIC-QBETA ++ loss=nae_loss(a,numn-d,nump-a,d); ++ else if(sparm->loss_function == AE) // AIC-QBETA ++ loss=ae_loss(a,numn-d,nump-a,d); ++ else if(sparm->loss_function == RAE) // AIC-QBETA ++ loss=rae_loss(a,numn-d,nump-a,d); ++ else if(sparm->loss_function == QBETA) // AIC-QBETA ++ loss=Qbeta_loss(a,numn-d,nump-a,d,sparm->beta); ++ else if(sparm->loss_function == QBETA_ACC) // AIC-QBETA ++ loss=Qbeta_acc_loss(a,numn-d,nump-a,d,sparm->beta); ++ else if(sparm->loss_function == QBETA_F1) // AIC-QBETA ++ loss=Qbeta_f1_loss(a,numn-d,nump-a,d,sparm->beta); ++ else if(sparm->loss_function == QBETA_GM) // AIC-QBETA ++ loss=Qbeta_gm_loss(a,numn-d,nump-a,d,sparm->beta); + else if(sparm->loss_function == ERRORRATE) + loss=errorrate_loss(a,numn-d,nump-a,d); + else if((sparm->loss_function == PRBEP) && (a+numn-d == nump)) +@@ -750,6 +832,12 @@ + loss=prec_k_loss(a,numn-d,nump-a,d); + else if((sparm->loss_function == REC_K) && (a+numn-d <= prec_rec_k)) + loss=rec_k_loss(a,numn-d,nump-a,d); ++ else if(sparm->loss_function == KLD) //KLD ++ loss=kldiv_loss(a,numn-d,nump-a,d); //KLD ++ else if(sparm->loss_function == NKLD) //KLD ++ loss=nkldiv_loss(a,numn-d,nump-a,d); //KLD ++ else if(sparm->loss_function == MILLI) //MILLI ++ loss=milli_loss(a,numn-d,nump-a,d); //MILLI + else { + loss=0; + } +@@ -1213,6 +1301,7 @@ + } + /* printf("%f %f\n",y.class[i],ybar.class[i]); */ + } ++ //printf("********** loss %d (a,b,c,d) (%d,%d,%d,%d) beta=%f\n",sparm->loss_function,a,b,c,d,sparm->beta); + /* Return the loss according to the selected loss function. */ + if(sparm->loss_function == ZEROONE) { /* type 0 loss: 0/1 loss */ + /* return 0, if y==ybar. return 1 else */ +@@ -1221,6 +1310,30 @@ + else if(sparm->loss_function == FONE) { + loss=fone_loss(a,b,c,d); + } ++ else if(sparm->loss_function == GM) { // AIC-QBETA ++ loss=gm_loss(a,b,c,d); ++ } ++ else if(sparm->loss_function == NAE) { // AIC-QBETA ++ loss=nae_loss(a,b,c,d); ++ } ++ else if(sparm->loss_function == AE) { // AIC-QBETA ++ loss=ae_loss(a,b,c,d); ++ } ++ else if(sparm->loss_function == RAE) { // AIC-QBETA ++ loss=rae_loss(a,b,c,d); ++ } ++ else if(sparm->loss_function == QBETA) { // AIC-QBETA ++ loss=Qbeta_loss(a,b,c,d,sparm->beta); ++ } ++ else if(sparm->loss_function == QBETA_ACC) { // AIC-QBETA ++ loss=Qbeta_acc_loss(a,b,c,d,sparm->beta); ++ } ++ else if(sparm->loss_function == QBETA_F1) { // AIC-QBETA ++ loss=Qbeta_f1_loss(a,b,c,d,sparm->beta); ++ } ++ else if(sparm->loss_function == QBETA_GM) { // AIC-QBETA ++ loss=Qbeta_gm_loss(a,b,c,d,sparm->beta); ++ } + else if(sparm->loss_function == ERRORRATE) { + loss=errorrate_loss(a,b,c,d); + } +@@ -1242,6 +1355,15 @@ + else if(sparm->loss_function == AVGPREC) { + loss=avgprec_loss(y,ybar); + } ++ else if(sparm->loss_function == KLD) { //KLD ++ loss=kldiv_loss(a,b,c,d); //KLD ++ } //KLD ++ else if(sparm->loss_function == NKLD) { //KLD ++ loss=nkldiv_loss(a,b,c,d); //KLD ++ } //KLD ++ else if(sparm->loss_function == MILLI) { //MILLI ++ loss=milli_loss(a,b,c,d); //MILLI ++ } //MILLI + else { + /* Put your code for different loss functions here. But then + find_most_violated_constraint_???(x, y, sm) has to return the +@@ -1320,6 +1442,16 @@ + printf("PRBEP : %5.2f\n",teststats->prbep); + printf("ROCArea : %5.2f\n",teststats->rocarea); + printf("AvgPrec : %5.2f\n",teststats->avgprec); ++ printf("Qb : %5.2f\n",teststats->Qbeta); ++ printf("Qb (Acc) : %5.2f\n",teststats->Qbeta_acc); ++ printf("Qb (F1) : %5.2f\n",teststats->Qbeta_f1); ++ printf("Qb (GM) : %5.2f\n",teststats->Qbeta_gm); ++ printf("NAE : %5.2f\n",teststats->nae); ++ printf("AE : %5.2f\n",teststats->ae); ++ printf("RAE : %5.2f\n",teststats->rae); ++ printf("GM : %5.2f\n",teststats->gm); ++ printf("KLD : %5.2f\n",teststats->kld); ++ printf("NKLD : %5.2f\n",teststats->nkld); + } + else { + printf("NOTE: %ld test examples are unlabeled, so performance cannot be computed. The\n",teststats->test_data_unlabeled); +@@ -1352,6 +1484,29 @@ + teststats->recall=100.0-loss(ex.y,ypred,sparm); + sparm->loss_function=FONE; + teststats->fone=100.0-loss(ex.y,ypred,sparm); ++ ++ sparm->loss_function=GM; // AIC-QBETA ++ teststats->gm=100.0-loss(ex.y,ypred,sparm); ++ sparm->loss_function=NAE; // AIC-QBETA ++ teststats->nae=100.0-loss(ex.y,ypred,sparm); ++ sparm->loss_function=AE; // AIC-QBETA ++ teststats->ae=100.0-loss(ex.y,ypred,sparm); ++ sparm->loss_function=RAE; // AIC-QBETA ++ teststats->rae=100.0-loss(ex.y,ypred,sparm); ++ sparm->loss_function=QBETA; // AIC-QBETA ++ teststats->Qbeta=100.0-loss(ex.y,ypred,sparm); ++ sparm->loss_function=QBETA_ACC; // AIC-QBETA ++ teststats->Qbeta_acc=100.0-loss(ex.y,ypred,sparm); ++ sparm->loss_function=QBETA_F1; // AIC-QBETA ++ teststats->Qbeta_f1=100.0-loss(ex.y,ypred,sparm); ++ sparm->loss_function=QBETA_GM; // AIC-QBETA ++ teststats->Qbeta_gm=100.0-loss(ex.y,ypred,sparm); ++ ++ sparm->loss_function=KLD; // KLD ++ teststats->kld=100-loss(ex.y,ypred,sparm); ++ sparm->loss_function=NKLD; // KLD ++ teststats->nkld=100.0-loss(ex.y,ypred,sparm); ++ + teststats->prbep=prbep(ex.y,ypred); + teststats->rocarea=rocarea(ex.y,ypred); + teststats->avgprec=avgprec(ex.y,ypred); +@@ -1403,6 +1558,7 @@ + STRUCTMODEL sm; + + sm.svm_model=read_model(file); ++ sparm->beta = 1; // AIC-QBETA ***************************** + sparm->loss_function=ERRORRATE; + sparm->bias=0; + sparm->bias_featurenum=0; +@@ -1514,6 +1670,18 @@ + printf(" %2d Prec@k: 100 minus precision at k in percent.\n",PREC_K); + printf(" %2d Rec@k: 100 minus recall at k in percent.\n",REC_K); + printf(" %2d ROCArea: Percentage of swapped pos/neg pairs (i.e. 100 - ROCArea).\n\n",SWAPPEDPAIRS); ++ printf(" %2d Kullback-Leibler divergence.\n",KLD); //KLD ++ printf(" %2d Normalized Kullback-Leibler divergence.\n",NKLD); //KLD ++ printf(" %2d MILLI.\n",MILLI); //MILLI ++ printf(" %2d GM: geometric mean of tpr and tnr\n",GM); // AIC-QBETA ++ printf(" %2d NAE: normalized absolute error (Esuli & Sebastiani)\n",NAE); // AIC-QBETA ++ printf(" %2d AE: absolute error (Esuli & Sebastiani)\n",AE); // AIC-QBETA ++ printf(" %2d RAE: relative absolute error (Esuli & Sebastiani)\n",RAE); // AIC-QBETA ++ printf(" %2d Qbeta: 100 minus the Qbeta-score in percent (with recall)\n",QBETA); // AIC-QBETA ++ printf(" %2d Qbeta_acc: 100 minus the Qbeta-score in percent (with acc)\n",QBETA_ACC); // AIC-QBETA ++ printf(" %2d Qbeta_f1: 100 minus the Qbeta-score in percent (with F1)\n",QBETA_F1); // AIC-QBETA ++ printf(" %2d Qbeta_gm: 100 minus the Qbeta-score in percent (with GM)\n",QBETA_GM); // AIC-QBETA ++ + printf("NOTE: The '-c' parameters in SVM-light and SVM-perf are related as\n"); + printf(" c_light = c_perf*100/n for the 'Errorrate' loss function, where n is the\n"); + printf(" number of training examples.\n\n"); +@@ -1785,7 +1953,65 @@ + free(predset); + return(100.0*(apr/(double)(nump))); + } +- ++/* AIC-QBETA */ ++double tnr(int a, int b, int c, int d) ++{ ++ /* Returns tnr as fractional value. */ ++ if((b+d) == 0) return(0.0); ++ return((double)d/(double)(b+d)); ++} ++double gm(int a, int b, int c, int d) ++{ ++ double tprate = rec(a,b,c,d); ++ double tnrate = tnr(a,b,c,d); ++ return sqrt( tprate * tnrate ); ++} ++double nae(int a, int b, int c, int d) ++{ ++ double maximo = (a+c > b+d? a+c: b+d); ++ return 1.0 - ( (double)abs(c-b) / maximo); ++ //return 1.0 - ( (double)abs(c-b) / (double)(a+b+c+d)); // ABSERR ++} ++double ae(int a, int b, int c, int d) ++{ ++ return (double)abs(c-b) / (double)(a+b+c+d) ; // ABSERR ++} ++double rae(int a, int b, int c, int d) ++{ ++ double absdif = (double)abs(c-b) ; ++ double smooth_rae_pos = absdif / ((double)(a+c+0.5)) ; ++ double smooth_rae_neg = absdif / ((double)(b+d+0.5)) ; ++ return 0.5*(smooth_rae_pos + smooth_rae_neg) ; ++} ++double Qbeta(int a, int b, int c, int d, double beta) ++{ ++ if(a+c == 0) return(0.0); ++ double qperf=nae(a,b,c,d); ++ double cperf=rec(a,b,c,d); ++ return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf)); ++} ++double Qbeta_acc(int a, int b, int c, int d, double beta) ++{ ++ if(a+c == 0) return(0.0); ++ double qperf=nae(a,b,c,d); ++ double cperf=1.0-errorrate(a,b,c,d); ++ return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf)); ++} ++double Qbeta_f1(int a, int b, int c, int d, double beta) ++{ ++ if(a+c == 0) return(0.0); ++ double qperf=nae(a,b,c,d); ++ double cperf=fone(a,b,c,d); ++ return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf)); ++} ++double Qbeta_gm(int a, int b, int c, int d, double beta) ++{ ++ if(a+c == 0) return(0.0); ++ double qperf=nae(a,b,c,d); ++ double cperf=gm(a,b,c,d); ++ return((1+beta*beta)*qperf*cperf/((beta*beta*cperf)+qperf)); ++} ++/* AIC-QBETA */ + /*------- Loss functions based on performance measures --------*/ + + double zeroone_loss(int a, int b, int c, int d) +@@ -1846,4 +2072,70 @@ + } + + ++//KLD ++double kldiv(int a, int b, int c, int d) ++{ ++ double sum = a+b+c+d+1.0; ++ double pab = (a+b+0.5)/sum; ++ double pac = (a+c+0.5)/sum; ++ double pbd = (b+d+0.5)/sum; ++ double pcd = (c+d+0.5)/sum; ++ ++ double kl = pac*log(pac/pab)+pbd*log(pbd/pcd); ++ ++ return kl; ++} ++ ++//KLD ++double kldiv_loss(int a, int b, int c, int d) ++{ ++ return kldiv(a,b,c,d); ++} ++ ++//KLD ++double nkldiv_loss(int a, int b, int c, int d) ++{ ++ return 100.0-(100.0*2.0/(1.0+exp(kldiv(a,b,c,d)))); ++} ++ ++//MILLI ++double milli_loss(int a, int b, int c, int d) ++{ ++ int sum = a+b+c+d; ++ return 100.0*(b+c)*ABS(b-c); ++} + ++/* AIC-QBETA */ ++double gm_loss(int a, int b, int c, int d) ++{ ++ return 100.0 * (1.0-gm(a,b,c,d)); ++} ++double nae_loss(int a, int b, int c, int d) ++{ ++ return 100.0 * (1.0-nae(a,b,c,d)); ++} ++double ae_loss(int a, int b, int c, int d) ++{ ++ return 100.0 * ae(a,b,c,d); ++} ++double rae_loss(int a, int b, int c, int d) ++{ ++ return 100.0 * rae(a,b,c,d); ++} ++double Qbeta_loss(int a, int b, int c, int d,double beta) ++{ ++ return(100.0*(1.0-Qbeta(a,b,c,d,beta))); ++} ++double Qbeta_acc_loss(int a, int b, int c, int d,double beta) ++{ ++ return(100.0*(1.0-Qbeta_acc(a,b,c,d,beta))); ++} ++double Qbeta_f1_loss(int a, int b, int c, int d,double beta) ++{ ++ return(100.0*(1.0-Qbeta_f1(a,b,c,d,beta))); ++} ++double Qbeta_gm_loss(int a, int b, int c, int d,double beta) ++{ ++ return(100.0*(1.0-Qbeta_gm(a,b,c,d,beta))); ++} ++/* AIC-QBETA */ +diff -ruN svm_perf/svm_struct_api_types.h svm_perf_quantification/svm_struct_api_types.h +--- svm_perf/svm_struct_api_types.h 2020-10-28 12:23:19.000000000 +0100 ++++ svm_perf_quantification/svm_struct_api_types.h 2020-10-28 12:23:53.000000000 +0100 +@@ -28,14 +28,25 @@ + # define INST_VERSION_DATE "15.07.2009" + + /* Identifiers for loss functions */ +-#define ZEROONE 0 +-#define FONE 1 +-#define ERRORRATE 2 +-#define PRBEP 3 +-#define PREC_K 4 +-#define REC_K 5 +-#define SWAPPEDPAIRS 10 +-#define AVGPREC 11 ++#define ZEROONE 0 ++#define FONE 1 ++#define ERRORRATE 2 ++#define PRBEP 3 ++#define PREC_K 4 ++#define REC_K 5 ++#define SWAPPEDPAIRS 10 ++#define AVGPREC 11 ++#define KLD 12 //KLD ++#define NKLD 13 //KLD ++#define MILLI 16 //MILLI ++#define GM 20 // AIC-QBETA ++#define NAE 21 // AIC-QBETA ++#define QBETA 22 // AIC-QBETA ++#define QBETA_ACC 23 // AIC-QBETA ++#define QBETA_F1 24 // AIC-QBETA ++#define QBETA_GM 25 // AIC-QBETA ++#define AE 26 // AIC-QBETA ++#define RAE 27 // AIC-QBETA + + /* default precision for solving the optimization problem */ + # define DEFAULT_EPS 0.1 +@@ -169,6 +180,8 @@ + svm_perf_classify. This uses more + memory, but is faster if the support + vectors in the model are dense. */ ++ double loss_parm; ++ double beta; /* AIC-QBETA */ + } STRUCT_LEARN_PARM; + + typedef struct struct_test_stats { +@@ -183,6 +196,16 @@ + double prbep; + double rocarea; + double avgprec; ++ double kld; //KLD ++ double nkld; //KLD ++ double gm; // AIC-QBETA ++ double nae; // AIC-QBETA ++ double ae; // AIC-QBETA ++ double rae; // AIC-QBETA ++ double Qbeta; // AIC-QBETA ++ double Qbeta_acc; // AIC-QBETA ++ double Qbeta_f1; // AIC-QBETA ++ double Qbeta_gm; // AIC-QBETA + } STRUCT_TEST_STATS; + + typedef struct struct_id_score {