elm examples

This commit is contained in:
Alejandro Moreo Fernandez 2023-02-13 12:01:52 +01:00
parent 4c74ff02a3
commit 505d2de823
7 changed files with 214 additions and 192 deletions

View File

@ -0,0 +1,72 @@
import quapy as qp
from quapy.method.aggregative import newELM
from quapy.method.base import newOneVsAll
from quapy.model_selection import GridSearchQ
from quapy.protocol import USimplexPP
"""
In this example, we will show hoy to define a quantifier based on explicit loss minimization (ELM).
ELM is a family of quantification methods relying on structured output learning. In particular, we will
showcase how to instantiate SVM(Q) as proposed by `Barranquero et al. 2015
<https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_, and SVM(KLD) and SVM(nKLD) as proposed by
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
All ELM quantifiers rely on SVMperf for optimizing a structured loss function (Q, KLD, or nKLD). Since these are
not part of the original SVMperf package by Joachims, you have to first download the SVMperf package, apply the
patch svm-perf-quantification-ext.patch (provided with QuaPy library), and compile the sources.
The script prepare_svmperf.sh does all the job. Simply run:
>>> ./prepare_svmperf.sh
Note that ELM quantifiers are nothing but a classify and count (CC) model instantiated with SVMperf as the
underlying classifier. E.g., SVM(Q) comes down to:
>>> CC(SVMperf(svmperf_base, loss='q'))
this means that ELM are aggregative quantifiers (since CC is an aggregative quantifier). QuaPy provides some helper
functions for simplify this; for example:
>>> newSVMQ(svmperf_base)
returns an instance of SVM(Q) (i.e., an instance of CC properly set to work with SVMperf optimizing for Q.
Since we wan to explore the losses, we will instead use newELM. For this example we will create a quantifier for tweet
sentiment analysis considering three classes: negative, neutral, and positive. Since SVMperf is a binary classifier,
our quantifier will be binary as well. We will use a one-vs-all approach to work in multiclass model.
For more details about how one-vs-all works, we refer to the example "one_vs_all.py" and to the API documentation.
"""
qp.environ['SAMPLE_SIZE'] = 100
qp.environ['N_JOBS'] = -1
qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification'
quantifier = newOneVsAll(newELM())
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
# load a ternary dataset
train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test
"""
model selection:
We explore the classifier's loss and the classifier's C hyperparameters.
Since our model is actually an instance of OneVsAllAggregative, we need to add the prefix "binary_quantifier", and
since our binary quantifier is an instance of CC, we need to add the prefix "classifier".
"""
param_grid = {
'binary_quantifier__classifier__loss': ['q', 'kld', 'mae'], # classifier-dependent hyperparameter
'binary_quantifier__classifier__C': [0.01, 1, 100], # classifier-dependent hyperparameter
}
print('starting model selection')
model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False)
quantifier = model_selection.fit(train_modsel).best_model()
print('training on the whole training set')
train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test
quantifier.fit(train)
# evaluation
mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae')
print(f'MAE = {mae:.4f}')

View File

@ -1,6 +1,6 @@
import quapy as qp import quapy as qp
from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric from quapy.method.aggregative import MS2
from quapy.method.base import getOneVsAll from quapy.method.base import newOneVsAll
from quapy.model_selection import GridSearchQ from quapy.model_selection import GridSearchQ
from quapy.protocol import USimplexPP from quapy.protocol import USimplexPP
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
@ -22,7 +22,7 @@ an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all case
some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions
during evaluation). during evaluation).
""" """
quantifier = getOneVsAll(MS2(LogisticRegression())) quantifier = newOneVsAll(MS2(LogisticRegression()))
print(f'the quantifier is an instance of {quantifier.__class__.__name__}') print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
# load a ternary dataset # load a ternary dataset

View File

@ -1,54 +0,0 @@
import quapy as qp
from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric, SVMQ
from quapy.method.base import getOneVsAll
from quapy.model_selection import GridSearchQ
from quapy.protocol import USimplexPP
from sklearn.linear_model import LogisticRegression
import numpy as np
"""
In this example, we will create a quantifier for tweet sentiment analysis considering three classes: negative, neutral,
and positive. We will use a one-vs-all approach using a binary quantifier for demonstration purposes.
"""
qp.environ['SAMPLE_SIZE'] = 100
qp.environ['N_JOBS'] = -1
qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification'
"""
Any binary quantifier can be turned into a single-label quantifier by means of getOneVsAll function.
This function returns an instance of OneVsAll quantifier. Actually, it either returns the subclass OneVsAllGeneric
when the quantifier is an instance of BaseQuantifier, and it returns OneVsAllAggregative when the quantifier is
an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all cases, using OneVsAllAggregative has
some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions
during evaluation).
"""
quantifier = getOneVsAll(SVMQ())
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
# load a ternary dataset
train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test
"""
model selection: for this example, we are relying on the USimplexPP protocol, i.e., a variant of the
artificial-prevalence protocol that generates random samples (100 in this case) for randomly picked priors
from the unit simplex. The priors are sampled using the Kraemer algorithm. Note this is in contrast to the
standard APP protocol, that instead explores a prefixed grid of prevalence values.
"""
param_grid = {
'binary_quantifier__classifier__C': np.logspace(-2,2,5), # classifier-dependent hyperparameter
}
print('starting model selection')
model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False)
quantifier = model_selection.fit(train_modsel).best_model()
print('training on the whole training set')
train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test
quantifier.fit(train)
# evaluation
mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae')
print(f'MAE = {mae:.4f}')

View File

@ -1,5 +1,7 @@
import random import random
import shutil
import subprocess import subprocess
import tempfile
from os import remove, makedirs from os import remove, makedirs
from os.path import join, exists from os.path import join, exists
from subprocess import PIPE, STDOUT from subprocess import PIPE, STDOUT
@ -23,29 +25,34 @@ class SVMperf(BaseEstimator, ClassifierMixin):
:param C: trade-off between training error and margin (default 0.01) :param C: trade-off between training error and margin (default 0.01)
:param verbose: set to True to print svm-perf std outputs :param verbose: set to True to print svm-perf std outputs
:param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae". :param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae".
:param host_folder: directory where to store the trained model; set to None (default) for using a tmp directory
(temporal directories are automatically deleted)
""" """
# losses with their respective codes in svm_perf implementation # losses with their respective codes in svm_perf implementation
valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27} valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27}
def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01'): def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None):
assert exists(svmperf_base), f'path {svmperf_base} does not seem to point to a valid path' assert exists(svmperf_base), f'path {svmperf_base} does not seem to point to a valid path'
self.svmperf_base = svmperf_base self.svmperf_base = svmperf_base
self.C = C self.C = C
self.verbose = verbose self.verbose = verbose
self.loss = loss self.loss = loss
self.host_folder = host_folder
def set_params(self, **parameters): # def set_params(self, **parameters):
""" # """
Set the hyper-parameters for svm-perf. Currently, only the `C` parameter is supported # Set the hyper-parameters for svm-perf. Currently, only the `C` and `loss` parameters are supported
#
:param parameters: a `**kwargs` dictionary `{'C': <float>}` # :param parameters: a `**kwargs` dictionary `{'C': <float>}`
""" # """
assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported' # assert sorted(list(parameters.keys())) == ['C', 'loss'], \
self.C = parameters['C'] # 'currently, only the C and loss parameters are supported'
# self.C = parameters.get('C', self.C)
def get_params(self, deep=True): # self.loss = parameters.get('loss', self.loss)
return {'C': self.C} #
# def get_params(self, deep=True):
# return {'C': self.C, 'loss': self.loss}
def fit(self, X, y): def fit(self, X, y):
""" """
@ -68,14 +75,14 @@ class SVMperf(BaseEstimator, ClassifierMixin):
local_random = random.Random() local_random = random.Random()
# this would allow to run parallel instances of predict # this would allow to run parallel instances of predict
random_code = '-'.join(str(local_random.randint(0,1000000)) for _ in range(5)) random_code = 'svmperfprocess'+'-'.join(str(local_random.randint(0, 1000000)) for _ in range(5))
# self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code) if self.host_folder is None:
# tmp dir are removed after the fit terminates in multiprocessing... moving to regular directories + __del__ # tmp dir are removed after the fit terminates in multiprocessing...
self.tmpdir = '.svmperf-' + random_code self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code).name
else:
self.tmpdir = join(self.host_folder, '.' + random_code)
makedirs(self.tmpdir, exist_ok=True) makedirs(self.tmpdir, exist_ok=True)
# self.model = join(self.tmpdir.name, 'model-'+random_code)
# traindat = join(self.tmpdir.name, f'train-{random_code}.dat')
self.model = join(self.tmpdir, 'model-'+random_code) self.model = join(self.tmpdir, 'model-'+random_code)
traindat = join(self.tmpdir, f'train-{random_code}.dat') traindat = join(self.tmpdir, f'train-{random_code}.dat')
@ -123,8 +130,6 @@ class SVMperf(BaseEstimator, ClassifierMixin):
# in order to allow for parallel runs of predict, a random code is assigned # in order to allow for parallel runs of predict, a random code is assigned
local_random = random.Random() local_random = random.Random()
random_code = '-'.join(str(local_random.randint(0, 1000000)) for _ in range(5)) random_code = '-'.join(str(local_random.randint(0, 1000000)) for _ in range(5))
# predictions_path = join(self.tmpdir.name, 'predictions'+random_code+'.dat')
# testdat = join(self.tmpdir.name, 'test'+random_code+'.dat')
predictions_path = join(self.tmpdir, 'predictions' + random_code + '.dat') predictions_path = join(self.tmpdir, 'predictions' + random_code + '.dat')
testdat = join(self.tmpdir, 'test' + random_code + '.dat') testdat = join(self.tmpdir, 'test' + random_code + '.dat')
dump_svmlight_file(X, y, testdat, zero_based=False) dump_svmlight_file(X, y, testdat, zero_based=False)
@ -145,5 +150,5 @@ class SVMperf(BaseEstimator, ClassifierMixin):
def __del__(self): def __del__(self):
if hasattr(self, 'tmpdir'): if hasattr(self, 'tmpdir'):
pass # shutil.rmtree(self.tmpdir, ignore_errors=True) shutil.rmtree(self.tmpdir, ignore_errors=True)

View File

@ -3,15 +3,6 @@ from . import base
from . import meta from . import meta
from . import non_aggregative from . import non_aggregative
EXPLICIT_LOSS_MINIMIZATION_METHODS = {
aggregative.ELM,
aggregative.SVMQ,
aggregative.SVMAE,
aggregative.SVMKLD,
aggregative.SVMRAE,
aggregative.SVMNKLD
}
AGGREGATIVE_METHODS = { AGGREGATIVE_METHODS = {
aggregative.CC, aggregative.CC,
aggregative.ACC, aggregative.ACC,
@ -26,7 +17,7 @@ AGGREGATIVE_METHODS = {
aggregative.MAX, aggregative.MAX,
aggregative.MS, aggregative.MS,
aggregative.MS2, aggregative.MS2,
} | EXPLICIT_LOSS_MINIMIZATION_METHODS }
NON_AGGREGATIVE_METHODS = { NON_AGGREGATIVE_METHODS = {

View File

@ -870,146 +870,155 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
return r.x return r.x
class ELM(AggregativeQuantifier, BinaryQuantifier): def newELM(svmperf_base=None, loss='01', C=1):
""" """
Class of Explicit Loss Minimization (ELM) quantifiers. Explicit Loss Minimization (ELM) quantifiers.
Quantifiers based on ELM represent a family of methods based on structured output learning; Quantifiers based on ELM represent a family of methods based on structured output learning;
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
measure. This implementation relies on measure. This implementation relies on
`Joachims SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output `Joachims SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
learning algorithm, which has to be installed and patched for the purpose (see this learning algorithm, which has to be installed and patched for the purpose (see this
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_). `script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
This function equivalent to:
:param classifier: an instance of `SVM perf` or None >>> CC(SVMperf(svmperf_base, loss, C))
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
this path will be obtained from qp.environ['SVMPERF_HOME']
:param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`) :param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`)
:param kwargs: rest of SVM perf's parameters :param C: trade-off between training error and margin (default 0.01)
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
underlying classifier
""" """
if svmperf_base is None:
def __init__(self, classifier=None, svmperf_base=None, loss='01', **kwargs): svmperf_base = qp.environ['SVMPERF_HOME']
self.svmperf_base = svmperf_base if svmperf_base is not None else qp.environ['SVMPERF_HOME'] assert svmperf_base is not None, \
self.loss = loss 'param svmperf_base was not specified, and the variable SVMPERF_HOME has not been set in the environment'
self.kwargs = kwargs return CC(SVMperf(svmperf_base, loss=loss, C=C))
assert classifier is None or isinstance(classifier, SVMperf), \
'param error "classifier": instances of ELM can only be instantiated with classifier SVMperf. ' \
'This parameter should either be an instance of SVMperf or None, in which case an SVMperf object ' \
'will be instantiaded using "svmperf_base" and "loss"'
if classifier is None:
self.classifier = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs)
else:
if classifier.loss != loss:
print(f'[warning]: the loss of the SVMperf object passed to arg "classifier" ({classifier.loss}) '
f'does not coincide with arg "loss" ({loss}); the latter will be ignored')
self.classifier = classifier
def fit(self, data: LabelledCollection, fit_classifier=True):
self._check_binary(data, self.__class__.__name__)
assert fit_classifier, 'the method requires that fit_classifier=True'
self.classifier.fit(data.instances, data.labels)
return self
def aggregate(self, classif_predictions: np.ndarray):
return F.prevalence_from_labels(classif_predictions, self.classes_)
def classify(self, X, y=None):
return self.classifier.predict(X)
class SVMQ(ELM): def newSVMQ(svmperf_base=None, C=1):
""" """
SVM(Q), which attempts to minimize the `Q` loss combining a classification-oriented loss and a SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the `Q` loss combining a
quantification-oriented loss, as proposed by classification-oriented loss and a quantification-oriented loss, as proposed by
`Barranquero et al. 2015 <https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_. `Barranquero et al. 2015 <https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_.
Equivalent to: Equivalent to:
>>> ELM(svmperf_base, loss='q', **kwargs) >>> CC(SVMperf(svmperf_base, loss='q', C=C))
:param classifier: not used, added for compatibility Quantifiers based on ELM represent a family of methods based on structured output learning;
:param svmperf_base: path to the folder containing the binary files of `SVM perf` these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
:param kwargs: rest of SVM perf's parameters measure. This implementation relies on
`Joachims SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
learning algorithm, which has to be installed and patched for the purpose (see this
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
this path will be obtained from qp.environ['SVMPERF_HOME']
:param C: trade-off between training error and margin (default 0.01)
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
underlying classifier
""" """
return newELM(svmperf_base, loss='q', C=C)
def __init__(self, classifier=None, svmperf_base=None, **kwargs): def newSVMKLD(svmperf_base=None, C=1):
assert classifier == None, \
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
class SVMKLD(ELM):
""" """
SVM(KLD), which attempts to minimize the Kullback-Leibler Divergence as proposed by SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence
as proposed by `Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
Equivalent to:
>>> CC(SVMperf(svmperf_base, loss='kld', C=C))
Quantifiers based on ELM represent a family of methods based on structured output learning;
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
measure. This implementation relies on
`Joachims SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
learning algorithm, which has to be installed and patched for the purpose (see this
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
this path will be obtained from qp.environ['SVMPERF_HOME']
:param C: trade-off between training error and margin (default 0.01)
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
underlying classifier
"""
return newELM(svmperf_base, loss='kld', C=C)
def newSVMKLD(svmperf_base=None, C=1):
"""
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence
normalized via the logistic function, as proposed by
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_. `Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
Equivalent to: Equivalent to:
>>> ELM(svmperf_base, loss='kld', **kwargs) >>> CC(SVMperf(svmperf_base, loss='nkld', C=C))
:param classifier: not used, added for compatibility Quantifiers based on ELM represent a family of methods based on structured output learning;
:param svmperf_base: path to the folder containing the binary files of `SVM perf` these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
:param kwargs: rest of SVM perf's parameters measure. This implementation relies on
`Joachims SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
learning algorithm, which has to be installed and patched for the purpose (see this
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
this path will be obtained from qp.environ['SVMPERF_HOME']
:param C: trade-off between training error and margin (default 0.01)
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
underlying classifier
""" """
return newELM(svmperf_base, loss='nkld', C=C)
def __init__(self, classifier=None, svmperf_base=None, **kwargs): def newSVMAE(svmperf_base=None, C=1):
assert classifier == None, \
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
class SVMNKLD(ELM):
""" """
SVM(NKLD), which attempts to minimize a version of the the Kullback-Leibler Divergence normalized SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by
via the logistic function, as proposed by
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
Equivalent to:
>>> ELM(svmperf_base, loss='nkld', **kwargs)
:param classifier: not used, added for compatibility
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
:param kwargs: rest of SVM perf's parameters
"""
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
assert classifier == None, \
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
class SVMAE(ELM):
"""
SVM(AE), which attempts to minimize Absolute Error as first used by
`Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_. `Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
Equivalent to: Equivalent to:
>>> ELM(svmperf_base, loss='mae', **kwargs) >>> CC(SVMperf(svmperf_base, loss='mae', C=C))
:param classifier: not used, added for compatibility Quantifiers based on ELM represent a family of methods based on structured output learning;
:param svmperf_base: path to the folder containing the binary files of `SVM perf` these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
:param kwargs: rest of SVM perf's parameters measure. This implementation relies on
`Joachims SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
learning algorithm, which has to be installed and patched for the purpose (see this
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
this path will be obtained from qp.environ['SVMPERF_HOME']
:param C: trade-off between training error and margin (default 0.01)
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
underlying classifier
""" """
return newELM(svmperf_base, loss='mae', C=C)
def __init__(self, classifier=None, svmperf_base=None, **kwargs): def newSVMAE(svmperf_base=None, C=1):
assert classifier == None, \
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs)
class SVMRAE(ELM):
""" """
SVM(RAE), which attempts to minimize Relative Absolute Error as first used by SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first
`Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_. used by `Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
Equivalent to: Equivalent to:
>>> ELM(svmperf_base, loss='mrae', **kwargs) >>> CC(SVMperf(svmperf_base, loss='mrae', C=C))
:param classifier: not used, added for compatibility Quantifiers based on ELM represent a family of methods based on structured output learning;
:param svmperf_base: path to the folder containing the binary files of `SVM perf` these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
:param kwargs: rest of SVM perf's parameters measure. This implementation relies on
`Joachims SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
learning algorithm, which has to be installed and patched for the purpose (see this
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
this path will be obtained from qp.environ['SVMPERF_HOME']
:param C: trade-off between training error and margin (default 0.01)
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
underlying classifier
""" """
return newELM(svmperf_base, loss='mrae', C=C)
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
assert classifier == None, \
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier): class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
@ -1267,7 +1276,6 @@ ProbabilisticAdjustedClassifyAndCount = PACC
ExpectationMaximizationQuantifier = EMQ ExpectationMaximizationQuantifier = EMQ
SLD = EMQ SLD = EMQ
HellingerDistanceY = HDy HellingerDistanceY = HDy
ExplicitLossMinimisation = ELM
MedianSweep = MS MedianSweep = MS
MedianSweep2 = MS2 MedianSweep2 = MS2

View File

@ -54,7 +54,7 @@ class OneVsAll:
pass pass
def getOneVsAll(binary_quantifier, n_jobs=None): def newOneVsAll(binary_quantifier, n_jobs=None):
assert isinstance(binary_quantifier, BaseQuantifier), \ assert isinstance(binary_quantifier, BaseQuantifier), \
f'{binary_quantifier} does not seem to be a Quantifier' f'{binary_quantifier} does not seem to be a Quantifier'
if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier): if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier):