elm examples
This commit is contained in:
parent
4c74ff02a3
commit
505d2de823
|
@ -0,0 +1,72 @@
|
||||||
|
import quapy as qp
|
||||||
|
from quapy.method.aggregative import newELM
|
||||||
|
from quapy.method.base import newOneVsAll
|
||||||
|
from quapy.model_selection import GridSearchQ
|
||||||
|
from quapy.protocol import USimplexPP
|
||||||
|
|
||||||
|
"""
|
||||||
|
In this example, we will show hoy to define a quantifier based on explicit loss minimization (ELM).
|
||||||
|
ELM is a family of quantification methods relying on structured output learning. In particular, we will
|
||||||
|
showcase how to instantiate SVM(Q) as proposed by `Barranquero et al. 2015
|
||||||
|
<https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_, and SVM(KLD) and SVM(nKLD) as proposed by
|
||||||
|
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
||||||
|
|
||||||
|
All ELM quantifiers rely on SVMperf for optimizing a structured loss function (Q, KLD, or nKLD). Since these are
|
||||||
|
not part of the original SVMperf package by Joachims, you have to first download the SVMperf package, apply the
|
||||||
|
patch svm-perf-quantification-ext.patch (provided with QuaPy library), and compile the sources.
|
||||||
|
The script prepare_svmperf.sh does all the job. Simply run:
|
||||||
|
|
||||||
|
>>> ./prepare_svmperf.sh
|
||||||
|
|
||||||
|
Note that ELM quantifiers are nothing but a classify and count (CC) model instantiated with SVMperf as the
|
||||||
|
underlying classifier. E.g., SVM(Q) comes down to:
|
||||||
|
|
||||||
|
>>> CC(SVMperf(svmperf_base, loss='q'))
|
||||||
|
|
||||||
|
this means that ELM are aggregative quantifiers (since CC is an aggregative quantifier). QuaPy provides some helper
|
||||||
|
functions for simplify this; for example:
|
||||||
|
|
||||||
|
>>> newSVMQ(svmperf_base)
|
||||||
|
|
||||||
|
returns an instance of SVM(Q) (i.e., an instance of CC properly set to work with SVMperf optimizing for Q.
|
||||||
|
|
||||||
|
Since we wan to explore the losses, we will instead use newELM. For this example we will create a quantifier for tweet
|
||||||
|
sentiment analysis considering three classes: negative, neutral, and positive. Since SVMperf is a binary classifier,
|
||||||
|
our quantifier will be binary as well. We will use a one-vs-all approach to work in multiclass model.
|
||||||
|
For more details about how one-vs-all works, we refer to the example "one_vs_all.py" and to the API documentation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
qp.environ['SAMPLE_SIZE'] = 100
|
||||||
|
qp.environ['N_JOBS'] = -1
|
||||||
|
qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification'
|
||||||
|
|
||||||
|
quantifier = newOneVsAll(newELM())
|
||||||
|
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
|
||||||
|
|
||||||
|
# load a ternary dataset
|
||||||
|
train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test
|
||||||
|
|
||||||
|
"""
|
||||||
|
model selection:
|
||||||
|
We explore the classifier's loss and the classifier's C hyperparameters.
|
||||||
|
Since our model is actually an instance of OneVsAllAggregative, we need to add the prefix "binary_quantifier", and
|
||||||
|
since our binary quantifier is an instance of CC, we need to add the prefix "classifier".
|
||||||
|
"""
|
||||||
|
param_grid = {
|
||||||
|
'binary_quantifier__classifier__loss': ['q', 'kld', 'mae'], # classifier-dependent hyperparameter
|
||||||
|
'binary_quantifier__classifier__C': [0.01, 1, 100], # classifier-dependent hyperparameter
|
||||||
|
}
|
||||||
|
print('starting model selection')
|
||||||
|
model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False)
|
||||||
|
quantifier = model_selection.fit(train_modsel).best_model()
|
||||||
|
|
||||||
|
print('training on the whole training set')
|
||||||
|
train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test
|
||||||
|
quantifier.fit(train)
|
||||||
|
|
||||||
|
# evaluation
|
||||||
|
mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae')
|
||||||
|
|
||||||
|
print(f'MAE = {mae:.4f}')
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric
|
from quapy.method.aggregative import MS2
|
||||||
from quapy.method.base import getOneVsAll
|
from quapy.method.base import newOneVsAll
|
||||||
from quapy.model_selection import GridSearchQ
|
from quapy.model_selection import GridSearchQ
|
||||||
from quapy.protocol import USimplexPP
|
from quapy.protocol import USimplexPP
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
@ -22,7 +22,7 @@ an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all case
|
||||||
some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions
|
some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions
|
||||||
during evaluation).
|
during evaluation).
|
||||||
"""
|
"""
|
||||||
quantifier = getOneVsAll(MS2(LogisticRegression()))
|
quantifier = newOneVsAll(MS2(LogisticRegression()))
|
||||||
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
|
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
|
||||||
|
|
||||||
# load a ternary dataset
|
# load a ternary dataset
|
||||||
|
|
|
@ -1,54 +0,0 @@
|
||||||
import quapy as qp
|
|
||||||
from quapy.method.aggregative import MS2, OneVsAllAggregative, OneVsAllGeneric, SVMQ
|
|
||||||
from quapy.method.base import getOneVsAll
|
|
||||||
from quapy.model_selection import GridSearchQ
|
|
||||||
from quapy.protocol import USimplexPP
|
|
||||||
from sklearn.linear_model import LogisticRegression
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
"""
|
|
||||||
In this example, we will create a quantifier for tweet sentiment analysis considering three classes: negative, neutral,
|
|
||||||
and positive. We will use a one-vs-all approach using a binary quantifier for demonstration purposes.
|
|
||||||
"""
|
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = 100
|
|
||||||
qp.environ['N_JOBS'] = -1
|
|
||||||
qp.environ['SVMPERF_HOME'] = '../svm_perf_quantification'
|
|
||||||
|
|
||||||
"""
|
|
||||||
Any binary quantifier can be turned into a single-label quantifier by means of getOneVsAll function.
|
|
||||||
This function returns an instance of OneVsAll quantifier. Actually, it either returns the subclass OneVsAllGeneric
|
|
||||||
when the quantifier is an instance of BaseQuantifier, and it returns OneVsAllAggregative when the quantifier is
|
|
||||||
an instance of AggregativeQuantifier. Although OneVsAllGeneric works in all cases, using OneVsAllAggregative has
|
|
||||||
some additional advantages (namely, all the advantages that AggregativeQuantifiers enjoy, i.e., faster predictions
|
|
||||||
during evaluation).
|
|
||||||
"""
|
|
||||||
quantifier = getOneVsAll(SVMQ())
|
|
||||||
print(f'the quantifier is an instance of {quantifier.__class__.__name__}')
|
|
||||||
|
|
||||||
# load a ternary dataset
|
|
||||||
train_modsel, val = qp.datasets.fetch_twitter('hcr', for_model_selection=True, pickle=True).train_test
|
|
||||||
|
|
||||||
"""
|
|
||||||
model selection: for this example, we are relying on the USimplexPP protocol, i.e., a variant of the
|
|
||||||
artificial-prevalence protocol that generates random samples (100 in this case) for randomly picked priors
|
|
||||||
from the unit simplex. The priors are sampled using the Kraemer algorithm. Note this is in contrast to the
|
|
||||||
standard APP protocol, that instead explores a prefixed grid of prevalence values.
|
|
||||||
"""
|
|
||||||
param_grid = {
|
|
||||||
'binary_quantifier__classifier__C': np.logspace(-2,2,5), # classifier-dependent hyperparameter
|
|
||||||
}
|
|
||||||
print('starting model selection')
|
|
||||||
model_selection = GridSearchQ(quantifier, param_grid, protocol=USimplexPP(val), verbose=True, refit=False)
|
|
||||||
quantifier = model_selection.fit(train_modsel).best_model()
|
|
||||||
|
|
||||||
print('training on the whole training set')
|
|
||||||
train, test = qp.datasets.fetch_twitter('hcr', for_model_selection=False, pickle=True).train_test
|
|
||||||
quantifier.fit(train)
|
|
||||||
|
|
||||||
# evaluation
|
|
||||||
mae = qp.evaluation.evaluate(quantifier, protocol=USimplexPP(test), error_metric='mae')
|
|
||||||
|
|
||||||
print(f'MAE = {mae:.4f}')
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
import random
|
import random
|
||||||
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import tempfile
|
||||||
from os import remove, makedirs
|
from os import remove, makedirs
|
||||||
from os.path import join, exists
|
from os.path import join, exists
|
||||||
from subprocess import PIPE, STDOUT
|
from subprocess import PIPE, STDOUT
|
||||||
|
@ -23,29 +25,34 @@ class SVMperf(BaseEstimator, ClassifierMixin):
|
||||||
:param C: trade-off between training error and margin (default 0.01)
|
:param C: trade-off between training error and margin (default 0.01)
|
||||||
:param verbose: set to True to print svm-perf std outputs
|
:param verbose: set to True to print svm-perf std outputs
|
||||||
:param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae".
|
:param loss: the loss to optimize for. Available losses are "01", "f1", "kld", "nkld", "q", "qacc", "qf1", "qgm", "mae", "mrae".
|
||||||
|
:param host_folder: directory where to store the trained model; set to None (default) for using a tmp directory
|
||||||
|
(temporal directories are automatically deleted)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# losses with their respective codes in svm_perf implementation
|
# losses with their respective codes in svm_perf implementation
|
||||||
valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27}
|
valid_losses = {'01':0, 'f1':1, 'kld':12, 'nkld':13, 'q':22, 'qacc':23, 'qf1':24, 'qgm':25, 'mae':26, 'mrae':27}
|
||||||
|
|
||||||
def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01'):
|
def __init__(self, svmperf_base, C=0.01, verbose=False, loss='01', host_folder=None):
|
||||||
assert exists(svmperf_base), f'path {svmperf_base} does not seem to point to a valid path'
|
assert exists(svmperf_base), f'path {svmperf_base} does not seem to point to a valid path'
|
||||||
self.svmperf_base = svmperf_base
|
self.svmperf_base = svmperf_base
|
||||||
self.C = C
|
self.C = C
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.loss = loss
|
self.loss = loss
|
||||||
|
self.host_folder = host_folder
|
||||||
|
|
||||||
def set_params(self, **parameters):
|
# def set_params(self, **parameters):
|
||||||
"""
|
# """
|
||||||
Set the hyper-parameters for svm-perf. Currently, only the `C` parameter is supported
|
# Set the hyper-parameters for svm-perf. Currently, only the `C` and `loss` parameters are supported
|
||||||
|
#
|
||||||
:param parameters: a `**kwargs` dictionary `{'C': <float>}`
|
# :param parameters: a `**kwargs` dictionary `{'C': <float>}`
|
||||||
"""
|
# """
|
||||||
assert list(parameters.keys()) == ['C'], 'currently, only the C parameter is supported'
|
# assert sorted(list(parameters.keys())) == ['C', 'loss'], \
|
||||||
self.C = parameters['C']
|
# 'currently, only the C and loss parameters are supported'
|
||||||
|
# self.C = parameters.get('C', self.C)
|
||||||
def get_params(self, deep=True):
|
# self.loss = parameters.get('loss', self.loss)
|
||||||
return {'C': self.C}
|
#
|
||||||
|
# def get_params(self, deep=True):
|
||||||
|
# return {'C': self.C, 'loss': self.loss}
|
||||||
|
|
||||||
def fit(self, X, y):
|
def fit(self, X, y):
|
||||||
"""
|
"""
|
||||||
|
@ -68,14 +75,14 @@ class SVMperf(BaseEstimator, ClassifierMixin):
|
||||||
|
|
||||||
local_random = random.Random()
|
local_random = random.Random()
|
||||||
# this would allow to run parallel instances of predict
|
# this would allow to run parallel instances of predict
|
||||||
random_code = '-'.join(str(local_random.randint(0,1000000)) for _ in range(5))
|
random_code = 'svmperfprocess'+'-'.join(str(local_random.randint(0, 1000000)) for _ in range(5))
|
||||||
# self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code)
|
if self.host_folder is None:
|
||||||
# tmp dir are removed after the fit terminates in multiprocessing... moving to regular directories + __del__
|
# tmp dir are removed after the fit terminates in multiprocessing...
|
||||||
self.tmpdir = '.svmperf-' + random_code
|
self.tmpdir = tempfile.TemporaryDirectory(suffix=random_code).name
|
||||||
|
else:
|
||||||
|
self.tmpdir = join(self.host_folder, '.' + random_code)
|
||||||
makedirs(self.tmpdir, exist_ok=True)
|
makedirs(self.tmpdir, exist_ok=True)
|
||||||
|
|
||||||
# self.model = join(self.tmpdir.name, 'model-'+random_code)
|
|
||||||
# traindat = join(self.tmpdir.name, f'train-{random_code}.dat')
|
|
||||||
self.model = join(self.tmpdir, 'model-'+random_code)
|
self.model = join(self.tmpdir, 'model-'+random_code)
|
||||||
traindat = join(self.tmpdir, f'train-{random_code}.dat')
|
traindat = join(self.tmpdir, f'train-{random_code}.dat')
|
||||||
|
|
||||||
|
@ -123,8 +130,6 @@ class SVMperf(BaseEstimator, ClassifierMixin):
|
||||||
# in order to allow for parallel runs of predict, a random code is assigned
|
# in order to allow for parallel runs of predict, a random code is assigned
|
||||||
local_random = random.Random()
|
local_random = random.Random()
|
||||||
random_code = '-'.join(str(local_random.randint(0, 1000000)) for _ in range(5))
|
random_code = '-'.join(str(local_random.randint(0, 1000000)) for _ in range(5))
|
||||||
# predictions_path = join(self.tmpdir.name, 'predictions'+random_code+'.dat')
|
|
||||||
# testdat = join(self.tmpdir.name, 'test'+random_code+'.dat')
|
|
||||||
predictions_path = join(self.tmpdir, 'predictions' + random_code + '.dat')
|
predictions_path = join(self.tmpdir, 'predictions' + random_code + '.dat')
|
||||||
testdat = join(self.tmpdir, 'test' + random_code + '.dat')
|
testdat = join(self.tmpdir, 'test' + random_code + '.dat')
|
||||||
dump_svmlight_file(X, y, testdat, zero_based=False)
|
dump_svmlight_file(X, y, testdat, zero_based=False)
|
||||||
|
@ -145,5 +150,5 @@ class SVMperf(BaseEstimator, ClassifierMixin):
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
if hasattr(self, 'tmpdir'):
|
if hasattr(self, 'tmpdir'):
|
||||||
pass # shutil.rmtree(self.tmpdir, ignore_errors=True)
|
shutil.rmtree(self.tmpdir, ignore_errors=True)
|
||||||
|
|
||||||
|
|
|
@ -3,15 +3,6 @@ from . import base
|
||||||
from . import meta
|
from . import meta
|
||||||
from . import non_aggregative
|
from . import non_aggregative
|
||||||
|
|
||||||
EXPLICIT_LOSS_MINIMIZATION_METHODS = {
|
|
||||||
aggregative.ELM,
|
|
||||||
aggregative.SVMQ,
|
|
||||||
aggregative.SVMAE,
|
|
||||||
aggregative.SVMKLD,
|
|
||||||
aggregative.SVMRAE,
|
|
||||||
aggregative.SVMNKLD
|
|
||||||
}
|
|
||||||
|
|
||||||
AGGREGATIVE_METHODS = {
|
AGGREGATIVE_METHODS = {
|
||||||
aggregative.CC,
|
aggregative.CC,
|
||||||
aggregative.ACC,
|
aggregative.ACC,
|
||||||
|
@ -26,7 +17,7 @@ AGGREGATIVE_METHODS = {
|
||||||
aggregative.MAX,
|
aggregative.MAX,
|
||||||
aggregative.MS,
|
aggregative.MS,
|
||||||
aggregative.MS2,
|
aggregative.MS2,
|
||||||
} | EXPLICIT_LOSS_MINIMIZATION_METHODS
|
}
|
||||||
|
|
||||||
|
|
||||||
NON_AGGREGATIVE_METHODS = {
|
NON_AGGREGATIVE_METHODS = {
|
||||||
|
|
|
@ -870,146 +870,155 @@ class DistributionMatching(AggregativeProbabilisticQuantifier):
|
||||||
return r.x
|
return r.x
|
||||||
|
|
||||||
|
|
||||||
class ELM(AggregativeQuantifier, BinaryQuantifier):
|
def newELM(svmperf_base=None, loss='01', C=1):
|
||||||
"""
|
"""
|
||||||
Class of Explicit Loss Minimization (ELM) quantifiers.
|
Explicit Loss Minimization (ELM) quantifiers.
|
||||||
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||||
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||||
measure. This implementation relies on
|
measure. This implementation relies on
|
||||||
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||||
learning algorithm, which has to be installed and patched for the purpose (see this
|
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||||
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||||
|
This function equivalent to:
|
||||||
|
|
||||||
:param classifier: an instance of `SVM perf` or None
|
>>> CC(SVMperf(svmperf_base, loss, C))
|
||||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
|
||||||
|
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||||
|
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||||
:param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`)
|
:param loss: the loss to optimize (see :attr:`quapy.classification.svmperf.SVMperf.valid_losses`)
|
||||||
:param kwargs: rest of SVM perf's parameters
|
:param C: trade-off between training error and margin (default 0.01)
|
||||||
|
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||||
|
underlying classifier
|
||||||
"""
|
"""
|
||||||
|
if svmperf_base is None:
|
||||||
def __init__(self, classifier=None, svmperf_base=None, loss='01', **kwargs):
|
svmperf_base = qp.environ['SVMPERF_HOME']
|
||||||
self.svmperf_base = svmperf_base if svmperf_base is not None else qp.environ['SVMPERF_HOME']
|
assert svmperf_base is not None, \
|
||||||
self.loss = loss
|
'param svmperf_base was not specified, and the variable SVMPERF_HOME has not been set in the environment'
|
||||||
self.kwargs = kwargs
|
return CC(SVMperf(svmperf_base, loss=loss, C=C))
|
||||||
assert classifier is None or isinstance(classifier, SVMperf), \
|
|
||||||
'param error "classifier": instances of ELM can only be instantiated with classifier SVMperf. ' \
|
|
||||||
'This parameter should either be an instance of SVMperf or None, in which case an SVMperf object ' \
|
|
||||||
'will be instantiaded using "svmperf_base" and "loss"'
|
|
||||||
if classifier is None:
|
|
||||||
self.classifier = SVMperf(self.svmperf_base, loss=self.loss, **self.kwargs)
|
|
||||||
else:
|
|
||||||
if classifier.loss != loss:
|
|
||||||
print(f'[warning]: the loss of the SVMperf object passed to arg "classifier" ({classifier.loss}) '
|
|
||||||
f'does not coincide with arg "loss" ({loss}); the latter will be ignored')
|
|
||||||
self.classifier = classifier
|
|
||||||
|
|
||||||
def fit(self, data: LabelledCollection, fit_classifier=True):
|
|
||||||
self._check_binary(data, self.__class__.__name__)
|
|
||||||
assert fit_classifier, 'the method requires that fit_classifier=True'
|
|
||||||
self.classifier.fit(data.instances, data.labels)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def aggregate(self, classif_predictions: np.ndarray):
|
|
||||||
return F.prevalence_from_labels(classif_predictions, self.classes_)
|
|
||||||
|
|
||||||
def classify(self, X, y=None):
|
|
||||||
return self.classifier.predict(X)
|
|
||||||
|
|
||||||
|
|
||||||
class SVMQ(ELM):
|
def newSVMQ(svmperf_base=None, C=1):
|
||||||
"""
|
"""
|
||||||
SVM(Q), which attempts to minimize the `Q` loss combining a classification-oriented loss and a
|
SVM(Q) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the `Q` loss combining a
|
||||||
quantification-oriented loss, as proposed by
|
classification-oriented loss and a quantification-oriented loss, as proposed by
|
||||||
`Barranquero et al. 2015 <https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_.
|
`Barranquero et al. 2015 <https://www.sciencedirect.com/science/article/pii/S003132031400291X>`_.
|
||||||
Equivalent to:
|
Equivalent to:
|
||||||
|
|
||||||
>>> ELM(svmperf_base, loss='q', **kwargs)
|
>>> CC(SVMperf(svmperf_base, loss='q', C=C))
|
||||||
|
|
||||||
:param classifier: not used, added for compatibility
|
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||||
:param kwargs: rest of SVM perf's parameters
|
measure. This implementation relies on
|
||||||
|
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||||
|
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||||
|
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||||
|
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||||
|
|
||||||
|
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||||
|
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||||
|
:param C: trade-off between training error and margin (default 0.01)
|
||||||
|
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||||
|
underlying classifier
|
||||||
"""
|
"""
|
||||||
|
return newELM(svmperf_base, loss='q', C=C)
|
||||||
|
|
||||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
def newSVMKLD(svmperf_base=None, C=1):
|
||||||
assert classifier == None, \
|
|
||||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
|
||||||
super(SVMQ, self).__init__(svmperf_base, loss='q', **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class SVMKLD(ELM):
|
|
||||||
"""
|
"""
|
||||||
SVM(KLD), which attempts to minimize the Kullback-Leibler Divergence as proposed by
|
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence
|
||||||
|
as proposed by `Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
||||||
|
Equivalent to:
|
||||||
|
|
||||||
|
>>> CC(SVMperf(svmperf_base, loss='kld', C=C))
|
||||||
|
|
||||||
|
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||||
|
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||||
|
measure. This implementation relies on
|
||||||
|
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||||
|
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||||
|
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||||
|
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||||
|
|
||||||
|
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||||
|
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||||
|
:param C: trade-off between training error and margin (default 0.01)
|
||||||
|
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||||
|
underlying classifier
|
||||||
|
"""
|
||||||
|
return newELM(svmperf_base, loss='kld', C=C)
|
||||||
|
|
||||||
|
|
||||||
|
def newSVMKLD(svmperf_base=None, C=1):
|
||||||
|
"""
|
||||||
|
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Kullback-Leibler Divergence
|
||||||
|
normalized via the logistic function, as proposed by
|
||||||
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
||||||
Equivalent to:
|
Equivalent to:
|
||||||
|
|
||||||
>>> ELM(svmperf_base, loss='kld', **kwargs)
|
>>> CC(SVMperf(svmperf_base, loss='nkld', C=C))
|
||||||
|
|
||||||
:param classifier: not used, added for compatibility
|
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||||
:param kwargs: rest of SVM perf's parameters
|
measure. This implementation relies on
|
||||||
|
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||||
|
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||||
|
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||||
|
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||||
|
|
||||||
|
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||||
|
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||||
|
:param C: trade-off between training error and margin (default 0.01)
|
||||||
|
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||||
|
underlying classifier
|
||||||
"""
|
"""
|
||||||
|
return newELM(svmperf_base, loss='nkld', C=C)
|
||||||
|
|
||||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
def newSVMAE(svmperf_base=None, C=1):
|
||||||
assert classifier == None, \
|
|
||||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
|
||||||
super(SVMKLD, self).__init__(svmperf_base, loss='kld', **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class SVMNKLD(ELM):
|
|
||||||
"""
|
"""
|
||||||
SVM(NKLD), which attempts to minimize a version of the the Kullback-Leibler Divergence normalized
|
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Absolute Error as first used by
|
||||||
via the logistic function, as proposed by
|
|
||||||
`Esuli et al. 2015 <https://dl.acm.org/doi/abs/10.1145/2700406>`_.
|
|
||||||
Equivalent to:
|
|
||||||
|
|
||||||
>>> ELM(svmperf_base, loss='nkld', **kwargs)
|
|
||||||
|
|
||||||
:param classifier: not used, added for compatibility
|
|
||||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
|
||||||
:param kwargs: rest of SVM perf's parameters
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
|
||||||
assert classifier == None, \
|
|
||||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
|
||||||
super(SVMNKLD, self).__init__(svmperf_base, loss='nkld', **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class SVMAE(ELM):
|
|
||||||
"""
|
|
||||||
SVM(AE), which attempts to minimize Absolute Error as first used by
|
|
||||||
`Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
|
`Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
|
||||||
Equivalent to:
|
Equivalent to:
|
||||||
|
|
||||||
>>> ELM(svmperf_base, loss='mae', **kwargs)
|
>>> CC(SVMperf(svmperf_base, loss='mae', C=C))
|
||||||
|
|
||||||
:param classifier: not used, added for compatibility
|
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||||
:param kwargs: rest of SVM perf's parameters
|
measure. This implementation relies on
|
||||||
|
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||||
|
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||||
|
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||||
|
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||||
|
|
||||||
|
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||||
|
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||||
|
:param C: trade-off between training error and margin (default 0.01)
|
||||||
|
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||||
|
underlying classifier
|
||||||
"""
|
"""
|
||||||
|
return newELM(svmperf_base, loss='mae', C=C)
|
||||||
|
|
||||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
def newSVMAE(svmperf_base=None, C=1):
|
||||||
assert classifier == None, \
|
|
||||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
|
||||||
super(SVMAE, self).__init__(svmperf_base, loss='mae', **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class SVMRAE(ELM):
|
|
||||||
"""
|
"""
|
||||||
SVM(RAE), which attempts to minimize Relative Absolute Error as first used by
|
SVM(KLD) is an Explicit Loss Minimization (ELM) quantifier set to optimize for the Relative Absolute Error as first
|
||||||
`Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
|
used by `Moreo and Sebastiani, 2021 <https://arxiv.org/abs/2011.02552>`_.
|
||||||
Equivalent to:
|
Equivalent to:
|
||||||
|
|
||||||
>>> ELM(svmperf_base, loss='mrae', **kwargs)
|
>>> CC(SVMperf(svmperf_base, loss='mrae', C=C))
|
||||||
|
|
||||||
:param classifier: not used, added for compatibility
|
Quantifiers based on ELM represent a family of methods based on structured output learning;
|
||||||
:param svmperf_base: path to the folder containing the binary files of `SVM perf`
|
these quantifiers rely on classifiers that have been optimized using a quantification-oriented loss
|
||||||
:param kwargs: rest of SVM perf's parameters
|
measure. This implementation relies on
|
||||||
|
`Joachims’ SVM perf <https://www.cs.cornell.edu/people/tj/svm_light/svm_perf.html>`_ structured output
|
||||||
|
learning algorithm, which has to be installed and patched for the purpose (see this
|
||||||
|
`script <https://github.com/HLT-ISTI/QuaPy/blob/master/prepare_svmperf.sh>`_).
|
||||||
|
This function is a wrapper around CC(SVMperf(svmperf_base, loss, C))
|
||||||
|
|
||||||
|
:param svmperf_base: path to the folder containing the binary files of `SVM perf`; if set to None (default)
|
||||||
|
this path will be obtained from qp.environ['SVMPERF_HOME']
|
||||||
|
:param C: trade-off between training error and margin (default 0.01)
|
||||||
|
:return: returns an instance of CC set to work with SVMperf (with loss and C set properly) as the
|
||||||
|
underlying classifier
|
||||||
"""
|
"""
|
||||||
|
return newELM(svmperf_base, loss='mrae', C=C)
|
||||||
def __init__(self, classifier=None, svmperf_base=None, **kwargs):
|
|
||||||
assert classifier == None, \
|
|
||||||
'param "classifier" should be None. SVMperf will be instantiated using "svmperf_base" path.'
|
|
||||||
super(SVMRAE, self).__init__(svmperf_base, loss='mrae', **kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
|
class ThresholdOptimization(AggregativeQuantifier, BinaryQuantifier):
|
||||||
|
@ -1267,7 +1276,6 @@ ProbabilisticAdjustedClassifyAndCount = PACC
|
||||||
ExpectationMaximizationQuantifier = EMQ
|
ExpectationMaximizationQuantifier = EMQ
|
||||||
SLD = EMQ
|
SLD = EMQ
|
||||||
HellingerDistanceY = HDy
|
HellingerDistanceY = HDy
|
||||||
ExplicitLossMinimisation = ELM
|
|
||||||
MedianSweep = MS
|
MedianSweep = MS
|
||||||
MedianSweep2 = MS2
|
MedianSweep2 = MS2
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,7 @@ class OneVsAll:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def getOneVsAll(binary_quantifier, n_jobs=None):
|
def newOneVsAll(binary_quantifier, n_jobs=None):
|
||||||
assert isinstance(binary_quantifier, BaseQuantifier), \
|
assert isinstance(binary_quantifier, BaseQuantifier), \
|
||||||
f'{binary_quantifier} does not seem to be a Quantifier'
|
f'{binary_quantifier} does not seem to be a Quantifier'
|
||||||
if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier):
|
if isinstance(binary_quantifier, qp.method.aggregative.AggregativeQuantifier):
|
||||||
|
|
Loading…
Reference in New Issue