elm examples
This commit is contained in:
parent
4c74ff02a3
commit
505d2de823
|
@ -0,0 +1,72 @@
|
|||
import quapy as qp
|
||||
from quapy.method.aggregative import newELM
|
||||
from quapy.method.base import newOneVsAll
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import USimplexPP
|
||||
|
||||
"""
|
||||
In this example, we will show hoy to define a quantifier based on explicit loss minimization (ELM).
|
||||
ELM is a family of quantification methods relying on structured output learning. In particular, we will
|
||||
showcase how to instantiate SVM(Q) as proposed by `Barranquero et al. 2015
|
||||
<https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_, and SVM(KLD) and SVM(nKLD) as proposed by
|
||||
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
||||
|
||||
All ELM quantifiers rely on SVMperf for optimizing a structured loss function (Q, KLD, or nKLD). Since these are
|
||||
not part of the original SVMperf package by Joachims, you have to first download the SVMperf package, apply the
|
||||
patch svm-perf-quantification-ext.patch (provided with QuaPy library), and compile the sources.
|
||||
The script prepare_svmperf.sh does all the job. Simply run:
|
||||
|
||||
>>> ./prepare_svmperf.sh
|
||||
|
||||
Note that ELM quantifiers are nothing but a classify and count (CC) model instantiated with SVMperf as the
|
||||
underlying classifier. E.g., SVM(Q) comes down to:
|
||||
|
||||
>>> CC(SVMperf(svmperf_base, loss='q'))
|
||||
|
||||
this means that ELM are aggregative quantifiers (since CC is an aggregative quantifier). QuaPy provides some helper
|
||||
functions for simplify this; for example:
|
||||
|
||||
>>> newSVMQ(svmperf_base)
|
||||
|
||||
returns an instance of SVM(Q) (i.e., an instance of CC properly set to work with SVMperf optimizing for Q.
|
||||
|
||||
Since we wan to explore the losses, we will instead use newELM. For this example we will create a quantifier for tweet
|
||||
sentiment analysis considering three classes: negative, neutral, and positive. Since SVMperf is a binary classifier,
|
||||
our quantifier will be binary as well. We will use a one-vs-all approach to work in multiclass model.
|
||||
For more details about how one-vs-all works, we refer to the example "one_vs_all.py" and to the API documentation.
|
||||
"""
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
qp.environ['N_JOBS'] = -1
|
||||
qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification'
|
||||
|
||||
quantifier = newOneVsAll(newELM())
|
||||
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
|
||||
|
||||
# load a ternary dataset
|
||||
train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test
|
||||
|
||||
"""
|
||||
model selection:
|
||||
We explore the classifier's loss and the classifier's C hyperparameters.
|
||||
Since our model is actually an instance of OneVsAllAggregative, we need to add the prefix "binary_quantifier", and
|
||||
since our binary quantifier is an instance of CC, we need to add the prefix "classifier".
|
||||
"""
|
||||
param_grid = {
|
||||
'binary_quantifier__classifier__loss': ['q', 'kld', 'mae'], # classifier-dependent hyperparameter
|
||||
'binary_quantifier__classifier__C': [0.01, 1, 100], # classifier-dependent hyperparameter
|
||||
}
|
||||
print('starting model selection')
|
||||
model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False)
|
||||
quantifier = model_selection.fit(train_modsel).best_model()
|
||||
|
||||
print('training on the whole training set')
|
||||
train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test
|
||||
quantifier.fit(train)
|
||||
|
||||
# evaluation
|
||||
mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae')
|
||||
|
||||
print(f'MAE = {mae:.4f}')
|
||||
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import quapy as qp
|
||||
from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric
|
||||
from quapy.method.base import getOneVsAll
|
||||
from quapy.method.aggregative import MS2
|
||||
from quapy.method.base import newOneVsAll
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import USimplexPP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
@ -22,7 +22,7 @@ an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all case
|
|||
some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions
|
||||
during evaluation).
|
||||
"""
|
||||
quantifier = getOneVsAll(MS2(LogisticRegression()))
|
||||
quantifier = newOneVsAll(MS2(LogisticRegression()))
|
||||
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
|
||||
|
||||
# load a ternary dataset
|
||||
|
|
|
@ -1,54 +0,0 @@
|
|||
import quapy as qp
|
||||
from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric, SVMQ
|
||||
from quapy.method.base import getOneVsAll
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.protocol import USimplexPP
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
import numpy as np
|
||||
|
||||
"""
|
||||
In this example, we will create a quantifier for tweet sentiment analysis considering three classes: negative, neutral,
|
||||
and positive. We will use a one-vs-all approach using a binary quantifier for demonstration purposes.
|
||||
"""
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 100
|
||||
qp.environ['N_JOBS'] = -1
|
||||
qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification'
|
||||
|
||||
"""
|
||||
Any binary quantifier can be turned into a single-label quantifier by means of getOneVsAll function.
|
||||
This function returns an instance of OneVsAll quantifier. Actually, it either returns the subclass OneVsAllGeneric
|
||||
when the quantifier is an instance of BaseQuantifier, and it returns OneVsAllAggregative when the quantifier is
|
||||
an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all cases, using OneVsAllAggregative has
|
||||
some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions
|
||||
during evaluation).
|
||||
"""
|
||||
quantifier = getOneVsAll(SVMQ())
|
||||
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
|
||||
|
||||
# load a ternary dataset
|
||||
train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test
|
||||
|
||||
"""
|
||||
model selection: for this example, we are relying on the USimplexPP protocol, i.e., a variant of the
|
||||
artificial-prevalence protocol that generates random samples (100 in this case) for randomly picked priors
|
||||
from the unit simplex. The priors are sampled using the Kraemer algorithm. Note this is in contrast to the
|
||||
standard APP protocol, that instead explores a prefixed grid of prevalence values.
|
||||
"""
|
||||
param_grid = {
|
||||
'binary_quantifier__classifier__C': np.logspace(-2,2,5), # classifier-dependent hyperparameter
|
||||
}
|
||||
print('starting model selection')
|
||||
model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False)
|
||||
quantifier = model_selection.fit(train_modsel).best_model()
|
||||
|
||||
print('training on the whole training set')
|
||||
train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test
|
||||
quantifier.fit(train)
|
||||
|
||||
# evaluation
|
||||
mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae')
|
||||
|
||||
print(f'MAE = {mae:.4f}')
|
||||
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
import random
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from os import remove, makedirs
|
||||
from os.path import join, exists
|
||||
from subprocess import PIPE, STDOUT
|
||||
|
@ -23,29 +25,34 @@ class SVMperf(BaseEstimator, ClassifierMixin):
|
|||
:param C: trade-off between training error and margin (default 0.01)
|
||||
:param verbose: set to True to print svm-perf std outputs
|
||||
:param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae".
|
||||
:param host_folder: directory where to store the trained model; set to None (default) for using a tmp directory
|
||||
(temporal directories are automatically deleted)
|
||||
"""
|
||||
|
||||
# losses with their respective codes in svm_perf implementation
|
||||
valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27}
|
||||
|
||||
def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01'):
|
||||
def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None):
|
||||
assert exists(svmperf_base), f'path {svmperf_base} does not seem to point to a valid path'
|
||||
self.svmperf_base = svmperf_base
|
||||
self.C = C
|
||||
self.verbose = verbose
|
||||
self.loss = loss
|
||||
self.host_folder = host_folder
|
||||
|
||||
def set_params(self, **parameters):
|
||||
"""
|
||||
Set the hyper-parameters for svm-perf. Currently, only the `C` parameter is supported
|
||||
|
||||
:param parameters: a `**kwargs` dictionary `{'C': <float>}`
|
||||
"""
|
||||
assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported'
|
||||
self.C = parameters['C']
|
||||
|
||||
def get_params(self, deep=True):
|
||||
return {'C': self.C}
|
||||
# def set_params(self, **parameters):
|
||||
# """
|
||||
# Set the hyper-parameters for svm-perf. Currently, only the `C` and `loss` parameters are supported
|
||||
#
|
||||
# :param parameters: a `**kwargs` dictionary `{'C': <float>}`
|
||||
# """
|
||||
# assert sorted(list(parameters.keys())) == ['C', 'loss'], \
|
||||
# 'currently, only the C and loss parameters are supported'
|
||||
# self.C = parameters.get('C', self.C)
|
||||
# self.loss = parameters.get('loss', self.loss)
|
||||
#
|
||||
# def get_params(self, deep=True):
|
||||
# return {'C': self.C, 'loss': self.loss}
|
||||
|
||||
def fit(self, X, y):
|
||||
"""
|
||||
|
@ -68,14 +75,14 @@ class SVMperf(BaseEstimator, ClassifierMixin):
|
|||
|
||||
local_random = random.Random()
|
||||
# this would allow to run parallel instances of predict
|
||||
random_code = '-'.join(str(local_random.randint(0,1000000)) for _ in range(5))
|
||||
# self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code)
|
||||
# tmp dir are removed after the fit terminates in multiprocessing... moving to regular directories + __del__
|
||||
self.tmpdir = '.svmperf-' + random_code
|
||||
random_code = 'svmperfprocess'+'-'.join(str(local_random.randint(0, 1000000)) for _ in range(5))
|
||||
if self.host_folder is None:
|
||||
# tmp dir are removed after the fit terminates in multiprocessing...
|
||||
self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code).name
|
||||
else:
|
||||
self.tmpdir = join(self.host_folder, '.' + random_code)
|
||||
makedirs(self.tmpdir, exist_ok=True)
|
||||
|
||||
# self.model = join(self.tmpdir.name, 'model-'+random_code)
|
||||
# traindat = join(self.tmpdir.name, f'train-{random_code}.dat')
|
||||
self.model = join(self.tmpdir, 'model-'+random_code)
|
||||
traindat = join(self.tmpdir, f'train-{random_code}.dat')
|
||||
|
||||
|
@ -123,8 +130,6 @@ class SVMperf(BaseEstimator, ClassifierMixin):
|
|||
# in order to allow for parallel runs of predict, a random code is assigned
|
||||
local_random = random.Random()
|
||||
random_code = '-'.join(str(local_random.randint(0, 1000000)) for _ in range(5))
|
||||
# predictions_path = join(self.tmpdir.name, 'predictions'+random_code+'.dat')
|
||||
# testdat = join(self.tmpdir.name, 'test'+random_code+'.dat')
|
||||
predictions_path = join(self.tmpdir, 'predictions' + random_code + '.dat')
|
||||
testdat = join(self.tmpdir, 'test' + random_code + '.dat')
|
||||
dump_svmlight_file(X, y, testdat, zero_based=False)
|
||||
|
@ -145,5 +150,5 @@ class SVMperf(BaseEstimator, ClassifierMixin):
|
|||
|
||||
def __del__(self):
|
||||
if hasattr(self, 'tmpdir'):
|
||||
pass # shutil.rmtree(self.tmpdir, ignore_errors=True)
|
||||
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
||||
|
||||
|
|
|
@ -3,15 +3,6 @@ from . import base
|
|||
from . import meta
|
||||
from . import non_aggregative
|
||||
|
||||
EXPLICIT_LOSS_MINIMIZATION_METHODS = {
|
||||
aggregative.ELM,
|
||||
aggregative.SVMQ,
|
||||
aggregative.SVMAE,
|
||||
aggregative.SVMKLD,
|
||||
aggregative.SVMRAE,
|
||||
aggregative.SVMNKLD
|
||||
}
|
||||
|
||||
AGGREGATIVE_METHODS = {
|
||||
aggregative.CC,
|
||||
aggregative.ACC,
|
||||
|
@ -26,7 +17,7 @@ AGGREGATIVE_METHODS = {
|
|||
aggregative.MAX,
|
||||
aggregative.MS,
|
||||
aggregative.MS2,
|
||||
} | EXPLICIT_LOSS_MINIMIZATION_METHODS
|
||||
}
|
||||
|
||||
|
||||
NON_AGGREGATIVE_METHODS = {
|
||||
|
|
|
@ -870,146 +870,155 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
|
|||
return r.x
|
||||
|
||||
|
||||
class ELM(AggregativeQuantifier, BinaryQuantifier):
|
||||
def newELM(svmperf_base=None, loss='01', C=1):
|
||||
"""
|
||||
Class of Explicit Loss Minimization (ELM) quantifiers.
|
||||
Explicit Loss Minimization (ELM) quantifiers.
|
||||
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||
measure. This implementation relies on
|
||||
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||
This function equivalent to:
|
||||
|
||||
:param classifier: an instance of `SVM perf` or None
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
||||
>>> CC(SVMperf(svmperf_base, loss, C))
|
||||
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||
:param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`)
|
||||
:param kwargs: rest of SVM perf's parameters
|
||||
:param C: trade-off between training error and margin (default 0.01)
|
||||
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||
underlying classifier
|
||||
"""
|
||||
|
||||
def __init__(self, classifier=None, svmperf_base=None, loss='01', **kwargs):
|
||||
self.svmperf_base = svmperf_base if svmperf_base is not None else qp.environ['SVMPERF_HOME']
|
||||
self.loss = loss
|
||||
self.kwargs = kwargs
|
||||
assert classifier is None or isinstance(classifier, SVMperf), \
|
||||
'param error "classifier": instances of ELM can only be instantiated with classifier SVMperf. ' \
|
||||
'This parameter should either be an instance of SVMperf or None, in which case an SVMperf object ' \
|
||||
'will be instantiaded using "svmperf_base" and "loss"'
|
||||
if classifier is None:
|
||||
self.classifier = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs)
|
||||
else:
|
||||
if classifier.loss != loss:
|
||||
print(f'[warning]: the loss of the SVMperf object passed to arg "classifier" ({classifier.loss}) '
|
||||
f'does not coincide with arg "loss" ({loss}); the latter will be ignored')
|
||||
self.classifier = classifier
|
||||
|
||||
def fit(self, data: LabelledCollection, fit_classifier=True):
|
||||
self._check_binary(data, self.__class__.__name__)
|
||||
assert fit_classifier, 'the method requires that fit_classifier=True'
|
||||
self.classifier.fit(data.instances, data.labels)
|
||||
return self
|
||||
|
||||
def aggregate(self, classif_predictions: np.ndarray):
|
||||
return F.prevalence_from_labels(classif_predictions, self.classes_)
|
||||
|
||||
def classify(self, X, y=None):
|
||||
return self.classifier.predict(X)
|
||||
if svmperf_base is None:
|
||||
svmperf_base = qp.environ['SVMPERF_HOME']
|
||||
assert svmperf_base is not None, \
|
||||
'param svmperf_base was not specified, and the variable SVMPERF_HOME has not been set in the environment'
|
||||
return CC(SVMperf(svmperf_base, loss=loss, C=C))
|
||||
|
||||
|
||||
class SVMQ(ELM):
|
||||
def newSVMQ(svmperf_base=None, C=1):
|
||||
"""
|
||||
SVM(Q), which attempts to minimize the `Q` loss combining a classification-oriented loss and a
|
||||
quantification-oriented loss, as proposed by
|
||||
SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the `Q` loss combining a
|
||||
classification-oriented loss and a quantification-oriented loss, as proposed by
|
||||
`Barranquero et al. 2015 <https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_.
|
||||
Equivalent to:
|
||||
|
||||
>>> ELM(svmperf_base, loss='q', **kwargs)
|
||||
>>> CC(SVMperf(svmperf_base, loss='q', C=C))
|
||||
|
||||
:param classifier: not used, added for compatibility
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
||||
:param kwargs: rest of SVM perf's parameters
|
||||
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||
measure. This implementation relies on
|
||||
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||
:param C: trade-off between training error and margin (default 0.01)
|
||||
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||
underlying classifier
|
||||
"""
|
||||
return newELM(svmperf_base, loss='q', C=C)
|
||||
|
||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
||||
assert classifier == None, \
|
||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
||||
super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
|
||||
|
||||
|
||||
class SVMKLD(ELM):
|
||||
def newSVMKLD(svmperf_base=None, C=1):
|
||||
"""
|
||||
SVM(KLD), which attempts to minimize the Kullback-Leibler Divergence as proposed by
|
||||
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence
|
||||
as proposed by `Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
||||
Equivalent to:
|
||||
|
||||
>>> CC(SVMperf(svmperf_base, loss='kld', C=C))
|
||||
|
||||
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||
measure. This implementation relies on
|
||||
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||
:param C: trade-off between training error and margin (default 0.01)
|
||||
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||
underlying classifier
|
||||
"""
|
||||
return newELM(svmperf_base, loss='kld', C=C)
|
||||
|
||||
|
||||
def newSVMKLD(svmperf_base=None, C=1):
|
||||
"""
|
||||
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence
|
||||
normalized via the logistic function, as proposed by
|
||||
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
||||
Equivalent to:
|
||||
|
||||
>>> ELM(svmperf_base, loss='kld', **kwargs)
|
||||
>>> CC(SVMperf(svmperf_base, loss='nkld', C=C))
|
||||
|
||||
:param classifier: not used, added for compatibility
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
||||
:param kwargs: rest of SVM perf's parameters
|
||||
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||
measure. This implementation relies on
|
||||
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||
:param C: trade-off between training error and margin (default 0.01)
|
||||
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||
underlying classifier
|
||||
"""
|
||||
return newELM(svmperf_base, loss='nkld', C=C)
|
||||
|
||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
||||
assert classifier == None, \
|
||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
||||
super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
|
||||
|
||||
|
||||
class SVMNKLD(ELM):
|
||||
def newSVMAE(svmperf_base=None, C=1):
|
||||
"""
|
||||
SVM(NKLD), which attempts to minimize a version of the the Kullback-Leibler Divergence normalized
|
||||
via the logistic function, as proposed by
|
||||
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
||||
Equivalent to:
|
||||
|
||||
>>> ELM(svmperf_base, loss='nkld', **kwargs)
|
||||
|
||||
:param classifier: not used, added for compatibility
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
||||
:param kwargs: rest of SVM perf's parameters
|
||||
"""
|
||||
|
||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
||||
assert classifier == None, \
|
||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
||||
super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
|
||||
|
||||
|
||||
class SVMAE(ELM):
|
||||
"""
|
||||
SVM(AE), which attempts to minimize Absolute Error as first used by
|
||||
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by
|
||||
`Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
|
||||
Equivalent to:
|
||||
|
||||
>>> ELM(svmperf_base, loss='mae', **kwargs)
|
||||
>>> CC(SVMperf(svmperf_base, loss='mae', C=C))
|
||||
|
||||
:param classifier: not used, added for compatibility
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
||||
:param kwargs: rest of SVM perf's parameters
|
||||
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||
measure. This implementation relies on
|
||||
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||
:param C: trade-off between training error and margin (default 0.01)
|
||||
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||
underlying classifier
|
||||
"""
|
||||
return newELM(svmperf_base, loss='mae', C=C)
|
||||
|
||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
||||
assert classifier == None, \
|
||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
||||
super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs)
|
||||
|
||||
|
||||
class SVMRAE(ELM):
|
||||
def newSVMAE(svmperf_base=None, C=1):
|
||||
"""
|
||||
SVM(RAE), which attempts to minimize Relative Absolute Error as first used by
|
||||
`Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
|
||||
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first
|
||||
used by `Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
|
||||
Equivalent to:
|
||||
|
||||
>>> ELM(svmperf_base, loss='mrae', **kwargs)
|
||||
>>> CC(SVMperf(svmperf_base, loss='mrae', C=C))
|
||||
|
||||
:param classifier: not used, added for compatibility
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
||||
:param kwargs: rest of SVM perf's parameters
|
||||
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||
measure. This implementation relies on
|
||||
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||
|
||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||
:param C: trade-off between training error and margin (default 0.01)
|
||||
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||
underlying classifier
|
||||
"""
|
||||
|
||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
||||
assert classifier == None, \
|
||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
||||
super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
|
||||
return newELM(svmperf_base, loss='mrae', C=C)
|
||||
|
||||
|
||||
class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
|
||||
|
@ -1267,7 +1276,6 @@ ProbabilisticAdjustedClassifyAndCount = PACC
|
|||
ExpectationMaximizationQuantifier = EMQ
|
||||
SLD = EMQ
|
||||
HellingerDistanceY = HDy
|
||||
ExplicitLossMinimisation = ELM
|
||||
MedianSweep = MS
|
||||
MedianSweep2 = MS2
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ class OneVsAll:
|
|||
pass
|
||||
|
||||
|
||||
def getOneVsAll(binary_quantifier, n_jobs=None):
|
||||
def newOneVsAll(binary_quantifier, n_jobs=None):
|
||||
assert isinstance(binary_quantifier, BaseQuantifier), \
|
||||
f'{binary_quantifier} does not seem to be a Quantifier'
|
||||
if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier):
|
||||
|
|
Loading…
Reference in New Issue